xref: /spdk/test/unit/lib/bdev/bdev.c/bdev_ut.c (revision ba20950a539d0b71a20f8a1199cbf759de92e854)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
3  *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5  */
6 
7 #include "spdk_cunit.h"
8 
9 #include "common/lib/ut_multithread.c"
10 #include "unit/lib/json_mock.c"
11 
12 #include "spdk/config.h"
13 /* HACK: disable VTune integration so the unit test doesn't need VTune headers and libs to build */
14 #undef SPDK_CONFIG_VTUNE
15 
16 #include "bdev/bdev.c"
17 
18 DEFINE_STUB(spdk_notify_send, uint64_t, (const char *type, const char *ctx), 0);
19 DEFINE_STUB(spdk_notify_type_register, struct spdk_notify_type *, (const char *type), NULL);
20 DEFINE_STUB(spdk_memory_domain_get_dma_device_id, const char *, (struct spdk_memory_domain *domain),
21 	    "test_domain");
22 DEFINE_STUB(spdk_memory_domain_get_dma_device_type, enum spdk_dma_device_type,
23 	    (struct spdk_memory_domain *domain), 0);
24 
25 static bool g_memory_domain_pull_data_called;
26 static bool g_memory_domain_push_data_called;
27 
28 DEFINE_RETURN_MOCK(spdk_memory_domain_pull_data, int);
29 int
30 spdk_memory_domain_pull_data(struct spdk_memory_domain *src_domain, void *src_domain_ctx,
31 			     struct iovec *src_iov, uint32_t src_iov_cnt, struct iovec *dst_iov, uint32_t dst_iov_cnt,
32 			     spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
33 {
34 	g_memory_domain_pull_data_called = true;
35 	HANDLE_RETURN_MOCK(spdk_memory_domain_pull_data);
36 	cpl_cb(cpl_cb_arg, 0);
37 	return 0;
38 }
39 
40 DEFINE_RETURN_MOCK(spdk_memory_domain_push_data, int);
41 int
42 spdk_memory_domain_push_data(struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
43 			     struct iovec *dst_iov, uint32_t dst_iovcnt, struct iovec *src_iov, uint32_t src_iovcnt,
44 			     spdk_memory_domain_data_cpl_cb cpl_cb, void *cpl_cb_arg)
45 {
46 	g_memory_domain_push_data_called = true;
47 	HANDLE_RETURN_MOCK(spdk_memory_domain_push_data);
48 	cpl_cb(cpl_cb_arg, 0);
49 	return 0;
50 }
51 
52 int g_status;
53 int g_count;
54 enum spdk_bdev_event_type g_event_type1;
55 enum spdk_bdev_event_type g_event_type2;
56 enum spdk_bdev_event_type g_event_type3;
57 enum spdk_bdev_event_type g_event_type4;
58 struct spdk_histogram_data *g_histogram;
59 void *g_unregister_arg;
60 int g_unregister_rc;
61 
62 void
63 spdk_scsi_nvme_translate(const struct spdk_bdev_io *bdev_io,
64 			 int *sc, int *sk, int *asc, int *ascq)
65 {
66 }
67 
68 static int
69 null_init(void)
70 {
71 	return 0;
72 }
73 
74 static int
75 null_clean(void)
76 {
77 	return 0;
78 }
79 
80 static int
81 stub_destruct(void *ctx)
82 {
83 	return 0;
84 }
85 
86 struct ut_expected_io {
87 	uint8_t				type;
88 	uint64_t			offset;
89 	uint64_t			src_offset;
90 	uint64_t			length;
91 	int				iovcnt;
92 	struct iovec			iov[SPDK_BDEV_IO_NUM_CHILD_IOV];
93 	void				*md_buf;
94 	TAILQ_ENTRY(ut_expected_io)	link;
95 };
96 
97 struct bdev_ut_channel {
98 	TAILQ_HEAD(, spdk_bdev_io)	outstanding_io;
99 	uint32_t			outstanding_io_count;
100 	TAILQ_HEAD(, ut_expected_io)	expected_io;
101 };
102 
103 static bool g_io_done;
104 static struct spdk_bdev_io *g_bdev_io;
105 static enum spdk_bdev_io_status g_io_status;
106 static enum spdk_bdev_io_status g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
107 static uint32_t g_bdev_ut_io_device;
108 static struct bdev_ut_channel *g_bdev_ut_channel;
109 static void *g_compare_read_buf;
110 static uint32_t g_compare_read_buf_len;
111 static void *g_compare_write_buf;
112 static uint32_t g_compare_write_buf_len;
113 static void *g_compare_md_buf;
114 static bool g_abort_done;
115 static enum spdk_bdev_io_status g_abort_status;
116 static void *g_zcopy_read_buf;
117 static uint32_t g_zcopy_read_buf_len;
118 static void *g_zcopy_write_buf;
119 static uint32_t g_zcopy_write_buf_len;
120 static struct spdk_bdev_io *g_zcopy_bdev_io;
121 static uint64_t g_seek_data_offset;
122 static uint64_t g_seek_hole_offset;
123 static uint64_t g_seek_offset;
124 
125 static struct ut_expected_io *
126 ut_alloc_expected_io(uint8_t type, uint64_t offset, uint64_t length, int iovcnt)
127 {
128 	struct ut_expected_io *expected_io;
129 
130 	expected_io = calloc(1, sizeof(*expected_io));
131 	SPDK_CU_ASSERT_FATAL(expected_io != NULL);
132 
133 	expected_io->type = type;
134 	expected_io->offset = offset;
135 	expected_io->length = length;
136 	expected_io->iovcnt = iovcnt;
137 
138 	return expected_io;
139 }
140 
141 static struct ut_expected_io *
142 ut_alloc_expected_copy_io(uint8_t type, uint64_t offset, uint64_t src_offset, uint64_t length)
143 {
144 	struct ut_expected_io *expected_io;
145 
146 	expected_io = calloc(1, sizeof(*expected_io));
147 	SPDK_CU_ASSERT_FATAL(expected_io != NULL);
148 
149 	expected_io->type = type;
150 	expected_io->offset = offset;
151 	expected_io->src_offset = src_offset;
152 	expected_io->length = length;
153 
154 	return expected_io;
155 }
156 
157 static void
158 ut_expected_io_set_iov(struct ut_expected_io *expected_io, int pos, void *base, size_t len)
159 {
160 	expected_io->iov[pos].iov_base = base;
161 	expected_io->iov[pos].iov_len = len;
162 }
163 
164 static void
165 stub_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
166 {
167 	struct bdev_ut_channel *ch = spdk_io_channel_get_ctx(_ch);
168 	struct ut_expected_io *expected_io;
169 	struct iovec *iov, *expected_iov;
170 	struct spdk_bdev_io *bio_to_abort;
171 	int i;
172 
173 	g_bdev_io = bdev_io;
174 
175 	if (g_compare_read_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
176 		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;
177 
178 		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
179 		CU_ASSERT(g_compare_read_buf_len == len);
180 		memcpy(bdev_io->u.bdev.iovs[0].iov_base, g_compare_read_buf, len);
181 		if (bdev_io->bdev->md_len && bdev_io->u.bdev.md_buf && g_compare_md_buf) {
182 			memcpy(bdev_io->u.bdev.md_buf, g_compare_md_buf,
183 			       bdev_io->bdev->md_len * bdev_io->u.bdev.num_blocks);
184 		}
185 	}
186 
187 	if (g_compare_write_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
188 		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;
189 
190 		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
191 		CU_ASSERT(g_compare_write_buf_len == len);
192 		memcpy(g_compare_write_buf, bdev_io->u.bdev.iovs[0].iov_base, len);
193 	}
194 
195 	if (g_compare_read_buf && bdev_io->type == SPDK_BDEV_IO_TYPE_COMPARE) {
196 		uint32_t len = bdev_io->u.bdev.iovs[0].iov_len;
197 
198 		CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
199 		CU_ASSERT(g_compare_read_buf_len == len);
200 		if (memcmp(bdev_io->u.bdev.iovs[0].iov_base, g_compare_read_buf, len)) {
201 			g_io_exp_status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
202 		}
203 		if (bdev_io->u.bdev.md_buf &&
204 		    memcmp(bdev_io->u.bdev.md_buf, g_compare_md_buf,
205 			   bdev_io->bdev->md_len * bdev_io->u.bdev.num_blocks)) {
206 			g_io_exp_status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
207 		}
208 	}
209 
210 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) {
211 		if (g_io_exp_status == SPDK_BDEV_IO_STATUS_SUCCESS) {
212 			TAILQ_FOREACH(bio_to_abort, &ch->outstanding_io, module_link) {
213 				if (bio_to_abort == bdev_io->u.abort.bio_to_abort) {
214 					TAILQ_REMOVE(&ch->outstanding_io, bio_to_abort, module_link);
215 					ch->outstanding_io_count--;
216 					spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_FAILED);
217 					break;
218 				}
219 			}
220 		}
221 	}
222 
223 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_ZCOPY) {
224 		if (bdev_io->u.bdev.zcopy.start) {
225 			g_zcopy_bdev_io = bdev_io;
226 			if (bdev_io->u.bdev.zcopy.populate) {
227 				/* Start of a read */
228 				CU_ASSERT(g_zcopy_read_buf != NULL);
229 				CU_ASSERT(g_zcopy_read_buf_len > 0);
230 				bdev_io->u.bdev.iovs[0].iov_base = g_zcopy_read_buf;
231 				bdev_io->u.bdev.iovs[0].iov_len = g_zcopy_read_buf_len;
232 				bdev_io->u.bdev.iovcnt = 1;
233 			} else {
234 				/* Start of a write */
235 				CU_ASSERT(g_zcopy_write_buf != NULL);
236 				CU_ASSERT(g_zcopy_write_buf_len > 0);
237 				bdev_io->u.bdev.iovs[0].iov_base = g_zcopy_write_buf;
238 				bdev_io->u.bdev.iovs[0].iov_len = g_zcopy_write_buf_len;
239 				bdev_io->u.bdev.iovcnt = 1;
240 			}
241 		} else {
242 			if (bdev_io->u.bdev.zcopy.commit) {
243 				/* End of write */
244 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_base == g_zcopy_write_buf);
245 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_len == g_zcopy_write_buf_len);
246 				CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
247 				g_zcopy_write_buf = NULL;
248 				g_zcopy_write_buf_len = 0;
249 			} else {
250 				/* End of read */
251 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_base == g_zcopy_read_buf);
252 				CU_ASSERT(bdev_io->u.bdev.iovs[0].iov_len == g_zcopy_read_buf_len);
253 				CU_ASSERT(bdev_io->u.bdev.iovcnt == 1);
254 				g_zcopy_read_buf = NULL;
255 				g_zcopy_read_buf_len = 0;
256 			}
257 		}
258 	}
259 
260 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_SEEK_DATA) {
261 		bdev_io->u.bdev.seek.offset = g_seek_data_offset;
262 	}
263 
264 	if (bdev_io->type == SPDK_BDEV_IO_TYPE_SEEK_HOLE) {
265 		bdev_io->u.bdev.seek.offset = g_seek_hole_offset;
266 	}
267 
268 	TAILQ_INSERT_TAIL(&ch->outstanding_io, bdev_io, module_link);
269 	ch->outstanding_io_count++;
270 
271 	expected_io = TAILQ_FIRST(&ch->expected_io);
272 	if (expected_io == NULL) {
273 		return;
274 	}
275 	TAILQ_REMOVE(&ch->expected_io, expected_io, link);
276 
277 	if (expected_io->type != SPDK_BDEV_IO_TYPE_INVALID) {
278 		CU_ASSERT(bdev_io->type == expected_io->type);
279 	}
280 
281 	if (expected_io->md_buf != NULL) {
282 		CU_ASSERT(expected_io->md_buf == bdev_io->u.bdev.md_buf);
283 	}
284 
285 	if (expected_io->length == 0) {
286 		free(expected_io);
287 		return;
288 	}
289 
290 	CU_ASSERT(expected_io->offset == bdev_io->u.bdev.offset_blocks);
291 	CU_ASSERT(expected_io->length = bdev_io->u.bdev.num_blocks);
292 	if (expected_io->type == SPDK_BDEV_IO_TYPE_COPY) {
293 		CU_ASSERT(expected_io->src_offset == bdev_io->u.bdev.copy.src_offset_blocks);
294 	}
295 
296 	if (expected_io->iovcnt == 0) {
297 		free(expected_io);
298 		/* UNMAP, WRITE_ZEROES, FLUSH and COPY don't have iovs, so we can just return now. */
299 		return;
300 	}
301 
302 	CU_ASSERT(expected_io->iovcnt == bdev_io->u.bdev.iovcnt);
303 	for (i = 0; i < expected_io->iovcnt; i++) {
304 		expected_iov = &expected_io->iov[i];
305 		if (bdev_io->internal.orig_iovcnt == 0) {
306 			iov = &bdev_io->u.bdev.iovs[i];
307 		} else {
308 			iov = bdev_io->internal.orig_iovs;
309 		}
310 		CU_ASSERT(iov->iov_len == expected_iov->iov_len);
311 		CU_ASSERT(iov->iov_base == expected_iov->iov_base);
312 	}
313 
314 	free(expected_io);
315 }
316 
317 static void
318 stub_submit_request_get_buf_cb(struct spdk_io_channel *_ch,
319 			       struct spdk_bdev_io *bdev_io, bool success)
320 {
321 	CU_ASSERT(success == true);
322 
323 	stub_submit_request(_ch, bdev_io);
324 }
325 
326 static void
327 stub_submit_request_get_buf(struct spdk_io_channel *_ch, struct spdk_bdev_io *bdev_io)
328 {
329 	spdk_bdev_io_get_buf(bdev_io, stub_submit_request_get_buf_cb,
330 			     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
331 }
332 
333 static uint32_t
334 stub_complete_io(uint32_t num_to_complete)
335 {
336 	struct bdev_ut_channel *ch = g_bdev_ut_channel;
337 	struct spdk_bdev_io *bdev_io;
338 	static enum spdk_bdev_io_status io_status;
339 	uint32_t num_completed = 0;
340 
341 	while (num_completed < num_to_complete) {
342 		if (TAILQ_EMPTY(&ch->outstanding_io)) {
343 			break;
344 		}
345 		bdev_io = TAILQ_FIRST(&ch->outstanding_io);
346 		TAILQ_REMOVE(&ch->outstanding_io, bdev_io, module_link);
347 		ch->outstanding_io_count--;
348 		io_status = g_io_exp_status == SPDK_BDEV_IO_STATUS_SUCCESS ? SPDK_BDEV_IO_STATUS_SUCCESS :
349 			    g_io_exp_status;
350 		spdk_bdev_io_complete(bdev_io, io_status);
351 		num_completed++;
352 	}
353 
354 	return num_completed;
355 }
356 
357 static struct spdk_io_channel *
358 bdev_ut_get_io_channel(void *ctx)
359 {
360 	return spdk_get_io_channel(&g_bdev_ut_io_device);
361 }
362 
363 static bool g_io_types_supported[SPDK_BDEV_NUM_IO_TYPES] = {
364 	[SPDK_BDEV_IO_TYPE_READ]		= true,
365 	[SPDK_BDEV_IO_TYPE_WRITE]		= true,
366 	[SPDK_BDEV_IO_TYPE_COMPARE]		= true,
367 	[SPDK_BDEV_IO_TYPE_UNMAP]		= true,
368 	[SPDK_BDEV_IO_TYPE_FLUSH]		= true,
369 	[SPDK_BDEV_IO_TYPE_RESET]		= true,
370 	[SPDK_BDEV_IO_TYPE_NVME_ADMIN]		= true,
371 	[SPDK_BDEV_IO_TYPE_NVME_IO]		= true,
372 	[SPDK_BDEV_IO_TYPE_NVME_IO_MD]		= true,
373 	[SPDK_BDEV_IO_TYPE_WRITE_ZEROES]	= true,
374 	[SPDK_BDEV_IO_TYPE_ZCOPY]		= true,
375 	[SPDK_BDEV_IO_TYPE_ABORT]		= true,
376 	[SPDK_BDEV_IO_TYPE_SEEK_HOLE]		= true,
377 	[SPDK_BDEV_IO_TYPE_SEEK_DATA]		= true,
378 	[SPDK_BDEV_IO_TYPE_COPY]		= true,
379 };
380 
381 static void
382 ut_enable_io_type(enum spdk_bdev_io_type io_type, bool enable)
383 {
384 	g_io_types_supported[io_type] = enable;
385 }
386 
387 static bool
388 stub_io_type_supported(void *_bdev, enum spdk_bdev_io_type io_type)
389 {
390 	return g_io_types_supported[io_type];
391 }
392 
393 static struct spdk_bdev_fn_table fn_table = {
394 	.destruct = stub_destruct,
395 	.submit_request = stub_submit_request,
396 	.get_io_channel = bdev_ut_get_io_channel,
397 	.io_type_supported = stub_io_type_supported,
398 };
399 
400 static int
401 bdev_ut_create_ch(void *io_device, void *ctx_buf)
402 {
403 	struct bdev_ut_channel *ch = ctx_buf;
404 
405 	CU_ASSERT(g_bdev_ut_channel == NULL);
406 	g_bdev_ut_channel = ch;
407 
408 	TAILQ_INIT(&ch->outstanding_io);
409 	ch->outstanding_io_count = 0;
410 	TAILQ_INIT(&ch->expected_io);
411 	return 0;
412 }
413 
414 static void
415 bdev_ut_destroy_ch(void *io_device, void *ctx_buf)
416 {
417 	CU_ASSERT(g_bdev_ut_channel != NULL);
418 	g_bdev_ut_channel = NULL;
419 }
420 
421 struct spdk_bdev_module bdev_ut_if;
422 
423 static int
424 bdev_ut_module_init(void)
425 {
426 	spdk_io_device_register(&g_bdev_ut_io_device, bdev_ut_create_ch, bdev_ut_destroy_ch,
427 				sizeof(struct bdev_ut_channel), NULL);
428 	spdk_bdev_module_init_done(&bdev_ut_if);
429 	return 0;
430 }
431 
432 static void
433 bdev_ut_module_fini(void)
434 {
435 	spdk_io_device_unregister(&g_bdev_ut_io_device, NULL);
436 }
437 
438 struct spdk_bdev_module bdev_ut_if = {
439 	.name = "bdev_ut",
440 	.module_init = bdev_ut_module_init,
441 	.module_fini = bdev_ut_module_fini,
442 	.async_init = true,
443 };
444 
445 static void vbdev_ut_examine_config(struct spdk_bdev *bdev);
446 static void vbdev_ut_examine_disk(struct spdk_bdev *bdev);
447 
448 static int
449 vbdev_ut_module_init(void)
450 {
451 	return 0;
452 }
453 
454 static void
455 vbdev_ut_module_fini(void)
456 {
457 }
458 
459 struct spdk_bdev_module vbdev_ut_if = {
460 	.name = "vbdev_ut",
461 	.module_init = vbdev_ut_module_init,
462 	.module_fini = vbdev_ut_module_fini,
463 	.examine_config = vbdev_ut_examine_config,
464 	.examine_disk = vbdev_ut_examine_disk,
465 };
466 
467 SPDK_BDEV_MODULE_REGISTER(bdev_ut, &bdev_ut_if)
468 SPDK_BDEV_MODULE_REGISTER(vbdev_ut, &vbdev_ut_if)
469 
470 struct ut_examine_ctx {
471 	void (*examine_config)(struct spdk_bdev *bdev);
472 	void (*examine_disk)(struct spdk_bdev *bdev);
473 	uint32_t examine_config_count;
474 	uint32_t examine_disk_count;
475 };
476 
477 static void
478 vbdev_ut_examine_config(struct spdk_bdev *bdev)
479 {
480 	struct ut_examine_ctx *ctx = bdev->ctxt;
481 
482 	if (ctx != NULL) {
483 		ctx->examine_config_count++;
484 		if (ctx->examine_config != NULL) {
485 			ctx->examine_config(bdev);
486 		}
487 	}
488 
489 	spdk_bdev_module_examine_done(&vbdev_ut_if);
490 }
491 
492 static void
493 vbdev_ut_examine_disk(struct spdk_bdev *bdev)
494 {
495 	struct ut_examine_ctx *ctx = bdev->ctxt;
496 
497 	if (ctx != NULL) {
498 		ctx->examine_disk_count++;
499 		if (ctx->examine_disk != NULL) {
500 			ctx->examine_disk(bdev);
501 		}
502 	}
503 
504 	spdk_bdev_module_examine_done(&vbdev_ut_if);
505 }
506 
507 static struct spdk_bdev *
508 allocate_bdev_ctx(char *name, void *ctx)
509 {
510 	struct spdk_bdev *bdev;
511 	int rc;
512 
513 	bdev = calloc(1, sizeof(*bdev));
514 	SPDK_CU_ASSERT_FATAL(bdev != NULL);
515 
516 	bdev->ctxt = ctx;
517 	bdev->name = name;
518 	bdev->fn_table = &fn_table;
519 	bdev->module = &bdev_ut_if;
520 	bdev->blockcnt = 1024;
521 	bdev->blocklen = 512;
522 
523 	spdk_uuid_generate(&bdev->uuid);
524 
525 	rc = spdk_bdev_register(bdev);
526 	poll_threads();
527 	CU_ASSERT(rc == 0);
528 
529 	return bdev;
530 }
531 
532 static struct spdk_bdev *
533 allocate_bdev(char *name)
534 {
535 	return allocate_bdev_ctx(name, NULL);
536 }
537 
538 static struct spdk_bdev *
539 allocate_vbdev(char *name)
540 {
541 	struct spdk_bdev *bdev;
542 	int rc;
543 
544 	bdev = calloc(1, sizeof(*bdev));
545 	SPDK_CU_ASSERT_FATAL(bdev != NULL);
546 
547 	bdev->name = name;
548 	bdev->fn_table = &fn_table;
549 	bdev->module = &vbdev_ut_if;
550 
551 	rc = spdk_bdev_register(bdev);
552 	poll_threads();
553 	CU_ASSERT(rc == 0);
554 
555 	return bdev;
556 }
557 
558 static void
559 free_bdev(struct spdk_bdev *bdev)
560 {
561 	spdk_bdev_unregister(bdev, NULL, NULL);
562 	poll_threads();
563 	memset(bdev, 0xFF, sizeof(*bdev));
564 	free(bdev);
565 }
566 
567 static void
568 free_vbdev(struct spdk_bdev *bdev)
569 {
570 	spdk_bdev_unregister(bdev, NULL, NULL);
571 	poll_threads();
572 	memset(bdev, 0xFF, sizeof(*bdev));
573 	free(bdev);
574 }
575 
576 static void
577 get_device_stat_cb(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat, void *cb_arg, int rc)
578 {
579 	const char *bdev_name;
580 
581 	CU_ASSERT(bdev != NULL);
582 	CU_ASSERT(rc == 0);
583 	bdev_name = spdk_bdev_get_name(bdev);
584 	CU_ASSERT_STRING_EQUAL(bdev_name, "bdev0");
585 
586 	free(stat);
587 
588 	*(bool *)cb_arg = true;
589 }
590 
591 static void
592 bdev_unregister_cb(void *cb_arg, int rc)
593 {
594 	g_unregister_arg = cb_arg;
595 	g_unregister_rc = rc;
596 }
597 
598 static void
599 bdev_ut_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
600 {
601 }
602 
603 static void
604 bdev_open_cb1(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
605 {
606 	struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx;
607 
608 	g_event_type1 = type;
609 	if (SPDK_BDEV_EVENT_REMOVE == type) {
610 		spdk_bdev_close(desc);
611 	}
612 }
613 
614 static void
615 bdev_open_cb2(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
616 {
617 	struct spdk_bdev_desc *desc = *(struct spdk_bdev_desc **)event_ctx;
618 
619 	g_event_type2 = type;
620 	if (SPDK_BDEV_EVENT_REMOVE == type) {
621 		spdk_bdev_close(desc);
622 	}
623 }
624 
625 static void
626 bdev_open_cb3(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
627 {
628 	g_event_type3 = type;
629 }
630 
631 static void
632 bdev_open_cb4(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
633 {
634 	g_event_type4 = type;
635 }
636 
637 static void
638 bdev_seek_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
639 {
640 	g_seek_offset = spdk_bdev_io_get_seek_offset(bdev_io);
641 	spdk_bdev_free_io(bdev_io);
642 }
643 
644 static void
645 get_device_stat_test(void)
646 {
647 	struct spdk_bdev *bdev;
648 	struct spdk_bdev_io_stat *stat;
649 	bool done;
650 
651 	bdev = allocate_bdev("bdev0");
652 	stat = calloc(1, sizeof(struct spdk_bdev_io_stat));
653 	if (stat == NULL) {
654 		free_bdev(bdev);
655 		return;
656 	}
657 
658 	done = false;
659 	spdk_bdev_get_device_stat(bdev, stat, get_device_stat_cb, &done);
660 	while (!done) { poll_threads(); }
661 
662 	free_bdev(bdev);
663 }
664 
665 static void
666 open_write_test(void)
667 {
668 	struct spdk_bdev *bdev[9];
669 	struct spdk_bdev_desc *desc[9] = {};
670 	int rc;
671 
672 	/*
673 	 * Create a tree of bdevs to test various open w/ write cases.
674 	 *
675 	 * bdev0 through bdev3 are physical block devices, such as NVMe
676 	 * namespaces or Ceph block devices.
677 	 *
678 	 * bdev4 is a virtual bdev with multiple base bdevs.  This models
679 	 * caching or RAID use cases.
680 	 *
681 	 * bdev5 through bdev7 are all virtual bdevs with the same base
682 	 * bdev (except bdev7). This models partitioning or logical volume
683 	 * use cases.
684 	 *
685 	 * bdev7 is a virtual bdev with multiple base bdevs. One of base bdevs
686 	 * (bdev2) is shared with other virtual bdevs: bdev5 and bdev6. This
687 	 * models caching, RAID, partitioning or logical volumes use cases.
688 	 *
689 	 * bdev8 is a virtual bdev with multiple base bdevs, but these
690 	 * base bdevs are themselves virtual bdevs.
691 	 *
692 	 *                bdev8
693 	 *                  |
694 	 *            +----------+
695 	 *            |          |
696 	 *          bdev4      bdev5   bdev6   bdev7
697 	 *            |          |       |       |
698 	 *        +---+---+      +---+   +   +---+---+
699 	 *        |       |           \  |  /         \
700 	 *      bdev0   bdev1          bdev2         bdev3
701 	 */
702 
703 	bdev[0] = allocate_bdev("bdev0");
704 	rc = spdk_bdev_module_claim_bdev(bdev[0], NULL, &bdev_ut_if);
705 	CU_ASSERT(rc == 0);
706 
707 	bdev[1] = allocate_bdev("bdev1");
708 	rc = spdk_bdev_module_claim_bdev(bdev[1], NULL, &bdev_ut_if);
709 	CU_ASSERT(rc == 0);
710 
711 	bdev[2] = allocate_bdev("bdev2");
712 	rc = spdk_bdev_module_claim_bdev(bdev[2], NULL, &bdev_ut_if);
713 	CU_ASSERT(rc == 0);
714 
715 	bdev[3] = allocate_bdev("bdev3");
716 	rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if);
717 	CU_ASSERT(rc == 0);
718 
719 	bdev[4] = allocate_vbdev("bdev4");
720 	rc = spdk_bdev_module_claim_bdev(bdev[4], NULL, &bdev_ut_if);
721 	CU_ASSERT(rc == 0);
722 
723 	bdev[5] = allocate_vbdev("bdev5");
724 	rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if);
725 	CU_ASSERT(rc == 0);
726 
727 	bdev[6] = allocate_vbdev("bdev6");
728 
729 	bdev[7] = allocate_vbdev("bdev7");
730 
731 	bdev[8] = allocate_vbdev("bdev8");
732 
733 	/* Open bdev0 read-only.  This should succeed. */
734 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc[0]);
735 	CU_ASSERT(rc == 0);
736 	SPDK_CU_ASSERT_FATAL(desc[0] != NULL);
737 	CU_ASSERT(bdev[0] == spdk_bdev_desc_get_bdev(desc[0]));
738 	spdk_bdev_close(desc[0]);
739 
740 	/*
741 	 * Open bdev1 read/write.  This should fail since bdev1 has been claimed
742 	 * by a vbdev module.
743 	 */
744 	rc = spdk_bdev_open_ext("bdev1", true, bdev_ut_event_cb, NULL, &desc[1]);
745 	CU_ASSERT(rc == -EPERM);
746 
747 	/*
748 	 * Open bdev4 read/write.  This should fail since bdev3 has been claimed
749 	 * by a vbdev module.
750 	 */
751 	rc = spdk_bdev_open_ext("bdev4", true, bdev_ut_event_cb, NULL, &desc[4]);
752 	CU_ASSERT(rc == -EPERM);
753 
754 	/* Open bdev4 read-only.  This should succeed. */
755 	rc = spdk_bdev_open_ext("bdev4", false, bdev_ut_event_cb, NULL, &desc[4]);
756 	CU_ASSERT(rc == 0);
757 	SPDK_CU_ASSERT_FATAL(desc[4] != NULL);
758 	CU_ASSERT(bdev[4] == spdk_bdev_desc_get_bdev(desc[4]));
759 	spdk_bdev_close(desc[4]);
760 
761 	/*
762 	 * Open bdev8 read/write.  This should succeed since it is a leaf
763 	 * bdev.
764 	 */
765 	rc = spdk_bdev_open_ext("bdev8", true, bdev_ut_event_cb, NULL, &desc[8]);
766 	CU_ASSERT(rc == 0);
767 	SPDK_CU_ASSERT_FATAL(desc[8] != NULL);
768 	CU_ASSERT(bdev[8] == spdk_bdev_desc_get_bdev(desc[8]));
769 	spdk_bdev_close(desc[8]);
770 
771 	/*
772 	 * Open bdev5 read/write.  This should fail since bdev4 has been claimed
773 	 * by a vbdev module.
774 	 */
775 	rc = spdk_bdev_open_ext("bdev5", true, bdev_ut_event_cb, NULL, &desc[5]);
776 	CU_ASSERT(rc == -EPERM);
777 
778 	/* Open bdev4 read-only.  This should succeed. */
779 	rc = spdk_bdev_open_ext("bdev5", false, bdev_ut_event_cb, NULL, &desc[5]);
780 	CU_ASSERT(rc == 0);
781 	SPDK_CU_ASSERT_FATAL(desc[5] != NULL);
782 	CU_ASSERT(bdev[5] == spdk_bdev_desc_get_bdev(desc[5]));
783 	spdk_bdev_close(desc[5]);
784 
785 	free_vbdev(bdev[8]);
786 
787 	free_vbdev(bdev[5]);
788 	free_vbdev(bdev[6]);
789 	free_vbdev(bdev[7]);
790 
791 	free_vbdev(bdev[4]);
792 
793 	free_bdev(bdev[0]);
794 	free_bdev(bdev[1]);
795 	free_bdev(bdev[2]);
796 	free_bdev(bdev[3]);
797 }
798 
799 static void
800 claim_test(void)
801 {
802 	struct spdk_bdev *bdev;
803 	struct spdk_bdev_desc *desc, *open_desc;
804 	int rc;
805 	uint32_t count;
806 
807 	/*
808 	 * A vbdev that uses a read-only bdev may need it to remain read-only.
809 	 * To do so, it opens the bdev read-only, then claims it without
810 	 * passing a spdk_bdev_desc.
811 	 */
812 	bdev = allocate_bdev("bdev0");
813 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
814 	CU_ASSERT(rc == 0);
815 	CU_ASSERT(desc->write == false);
816 
817 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
818 	CU_ASSERT(rc == 0);
819 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
820 	CU_ASSERT(bdev->internal.claim.v1.module == &bdev_ut_if);
821 
822 	/* There should be only one open descriptor and it should still be ro */
823 	count = 0;
824 	TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
825 		CU_ASSERT(open_desc == desc);
826 		CU_ASSERT(!open_desc->write);
827 		count++;
828 	}
829 	CU_ASSERT(count == 1);
830 
831 	/* A read-only bdev is upgraded to read-write if desc is passed. */
832 	spdk_bdev_module_release_bdev(bdev);
833 	rc = spdk_bdev_module_claim_bdev(bdev, desc, &bdev_ut_if);
834 	CU_ASSERT(rc == 0);
835 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
836 	CU_ASSERT(bdev->internal.claim.v1.module == &bdev_ut_if);
837 
838 	/* There should be only one open descriptor and it should be rw */
839 	count = 0;
840 	TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
841 		CU_ASSERT(open_desc == desc);
842 		CU_ASSERT(open_desc->write);
843 		count++;
844 	}
845 	CU_ASSERT(count == 1);
846 
847 	spdk_bdev_close(desc);
848 	free_bdev(bdev);
849 }
850 
851 static void
852 bytes_to_blocks_test(void)
853 {
854 	struct spdk_bdev bdev;
855 	uint64_t offset_blocks, num_blocks;
856 
857 	memset(&bdev, 0, sizeof(bdev));
858 
859 	bdev.blocklen = 512;
860 
861 	/* All parameters valid */
862 	offset_blocks = 0;
863 	num_blocks = 0;
864 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 512, &offset_blocks, 1024, &num_blocks) == 0);
865 	CU_ASSERT(offset_blocks == 1);
866 	CU_ASSERT(num_blocks == 2);
867 
868 	/* Offset not a block multiple */
869 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 3, &offset_blocks, 512, &num_blocks) != 0);
870 
871 	/* Length not a block multiple */
872 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 512, &offset_blocks, 3, &num_blocks) != 0);
873 
874 	/* In case blocklen not the power of two */
875 	bdev.blocklen = 100;
876 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 100, &offset_blocks, 200, &num_blocks) == 0);
877 	CU_ASSERT(offset_blocks == 1);
878 	CU_ASSERT(num_blocks == 2);
879 
880 	/* Offset not a block multiple */
881 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 3, &offset_blocks, 100, &num_blocks) != 0);
882 
883 	/* Length not a block multiple */
884 	CU_ASSERT(bdev_bytes_to_blocks(&bdev, 100, &offset_blocks, 3, &num_blocks) != 0);
885 }
886 
887 static void
888 num_blocks_test(void)
889 {
890 	struct spdk_bdev bdev;
891 	struct spdk_bdev_desc *desc = NULL;
892 	int rc;
893 
894 	memset(&bdev, 0, sizeof(bdev));
895 	bdev.name = "num_blocks";
896 	bdev.fn_table = &fn_table;
897 	bdev.module = &bdev_ut_if;
898 	spdk_bdev_register(&bdev);
899 	poll_threads();
900 	spdk_bdev_notify_blockcnt_change(&bdev, 50);
901 
902 	/* Growing block number */
903 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 70) == 0);
904 	/* Shrinking block number */
905 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 30) == 0);
906 
907 	rc = spdk_bdev_open_ext("num_blocks", false, bdev_open_cb1, &desc, &desc);
908 	CU_ASSERT(rc == 0);
909 	SPDK_CU_ASSERT_FATAL(desc != NULL);
910 	CU_ASSERT(&bdev == spdk_bdev_desc_get_bdev(desc));
911 
912 	/* Growing block number */
913 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 80) == 0);
914 	/* Shrinking block number */
915 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 20) != 0);
916 
917 	g_event_type1 = 0xFF;
918 	/* Growing block number */
919 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 90) == 0);
920 
921 	poll_threads();
922 	CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_RESIZE);
923 
924 	g_event_type1 = 0xFF;
925 	/* Growing block number and closing */
926 	CU_ASSERT(spdk_bdev_notify_blockcnt_change(&bdev, 100) == 0);
927 
928 	spdk_bdev_close(desc);
929 	spdk_bdev_unregister(&bdev, NULL, NULL);
930 
931 	poll_threads();
932 
933 	/* Callback is not called for closed device */
934 	CU_ASSERT_EQUAL(g_event_type1, 0xFF);
935 }
936 
937 static void
938 io_valid_test(void)
939 {
940 	struct spdk_bdev bdev;
941 
942 	memset(&bdev, 0, sizeof(bdev));
943 
944 	bdev.blocklen = 512;
945 	spdk_spin_init(&bdev.internal.spinlock);
946 
947 	spdk_bdev_notify_blockcnt_change(&bdev, 100);
948 
949 	/* All parameters valid */
950 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 1, 2) == true);
951 
952 	/* Last valid block */
953 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 99, 1) == true);
954 
955 	/* Offset past end of bdev */
956 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 100, 1) == false);
957 
958 	/* Offset + length past end of bdev */
959 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 99, 2) == false);
960 
961 	/* Offset near end of uint64_t range (2^64 - 1) */
962 	CU_ASSERT(bdev_io_valid_blocks(&bdev, 18446744073709551615ULL, 1) == false);
963 
964 	spdk_spin_destroy(&bdev.internal.spinlock);
965 }
966 
967 static void
968 alias_add_del_test(void)
969 {
970 	struct spdk_bdev *bdev[3];
971 	int rc;
972 
973 	/* Creating and registering bdevs */
974 	bdev[0] = allocate_bdev("bdev0");
975 	SPDK_CU_ASSERT_FATAL(bdev[0] != 0);
976 
977 	bdev[1] = allocate_bdev("bdev1");
978 	SPDK_CU_ASSERT_FATAL(bdev[1] != 0);
979 
980 	bdev[2] = allocate_bdev("bdev2");
981 	SPDK_CU_ASSERT_FATAL(bdev[2] != 0);
982 
983 	poll_threads();
984 
985 	/*
986 	 * Trying adding an alias identical to name.
987 	 * Alias is identical to name, so it can not be added to aliases list
988 	 */
989 	rc = spdk_bdev_alias_add(bdev[0], bdev[0]->name);
990 	CU_ASSERT(rc == -EEXIST);
991 
992 	/*
993 	 * Trying to add empty alias,
994 	 * this one should fail
995 	 */
996 	rc = spdk_bdev_alias_add(bdev[0], NULL);
997 	CU_ASSERT(rc == -EINVAL);
998 
999 	/* Trying adding same alias to two different registered bdevs */
1000 
1001 	/* Alias is used first time, so this one should pass */
1002 	rc = spdk_bdev_alias_add(bdev[0], "proper alias 0");
1003 	CU_ASSERT(rc == 0);
1004 
1005 	/* Alias was added to another bdev, so this one should fail */
1006 	rc = spdk_bdev_alias_add(bdev[1], "proper alias 0");
1007 	CU_ASSERT(rc == -EEXIST);
1008 
1009 	/* Alias is used first time, so this one should pass */
1010 	rc = spdk_bdev_alias_add(bdev[1], "proper alias 1");
1011 	CU_ASSERT(rc == 0);
1012 
1013 	/* Trying removing an alias from registered bdevs */
1014 
1015 	/* Alias is not on a bdev aliases list, so this one should fail */
1016 	rc = spdk_bdev_alias_del(bdev[0], "not existing");
1017 	CU_ASSERT(rc == -ENOENT);
1018 
1019 	/* Alias is present on a bdev aliases list, so this one should pass */
1020 	rc = spdk_bdev_alias_del(bdev[0], "proper alias 0");
1021 	CU_ASSERT(rc == 0);
1022 
1023 	/* Alias is present on a bdev aliases list, so this one should pass */
1024 	rc = spdk_bdev_alias_del(bdev[1], "proper alias 1");
1025 	CU_ASSERT(rc == 0);
1026 
1027 	/* Trying to remove name instead of alias, so this one should fail, name cannot be changed or removed */
1028 	rc = spdk_bdev_alias_del(bdev[0], bdev[0]->name);
1029 	CU_ASSERT(rc != 0);
1030 
1031 	/* Trying to del all alias from empty alias list */
1032 	spdk_bdev_alias_del_all(bdev[2]);
1033 	SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&bdev[2]->aliases));
1034 
1035 	/* Trying to del all alias from non-empty alias list */
1036 	rc = spdk_bdev_alias_add(bdev[2], "alias0");
1037 	CU_ASSERT(rc == 0);
1038 	rc = spdk_bdev_alias_add(bdev[2], "alias1");
1039 	CU_ASSERT(rc == 0);
1040 	spdk_bdev_alias_del_all(bdev[2]);
1041 	CU_ASSERT(TAILQ_EMPTY(&bdev[2]->aliases));
1042 
1043 	/* Unregister and free bdevs */
1044 	spdk_bdev_unregister(bdev[0], NULL, NULL);
1045 	spdk_bdev_unregister(bdev[1], NULL, NULL);
1046 	spdk_bdev_unregister(bdev[2], NULL, NULL);
1047 
1048 	poll_threads();
1049 
1050 	free(bdev[0]);
1051 	free(bdev[1]);
1052 	free(bdev[2]);
1053 }
1054 
1055 static void
1056 io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
1057 {
1058 	g_io_done = true;
1059 	g_io_status = bdev_io->internal.status;
1060 	if ((bdev_io->type == SPDK_BDEV_IO_TYPE_ZCOPY) &&
1061 	    (bdev_io->u.bdev.zcopy.start)) {
1062 		g_zcopy_bdev_io = bdev_io;
1063 	} else {
1064 		spdk_bdev_free_io(bdev_io);
1065 		g_zcopy_bdev_io = NULL;
1066 	}
1067 }
1068 
1069 static void
1070 bdev_init_cb(void *arg, int rc)
1071 {
1072 	CU_ASSERT(rc == 0);
1073 }
1074 
1075 static void
1076 bdev_fini_cb(void *arg)
1077 {
1078 }
1079 
1080 static void
1081 ut_init_bdev(struct spdk_bdev_opts *opts)
1082 {
1083 	int rc;
1084 
1085 	if (opts != NULL) {
1086 		rc = spdk_bdev_set_opts(opts);
1087 		CU_ASSERT(rc == 0);
1088 	}
1089 	rc = spdk_iobuf_initialize();
1090 	CU_ASSERT(rc == 0);
1091 	spdk_bdev_initialize(bdev_init_cb, NULL);
1092 	poll_threads();
1093 }
1094 
1095 static void
1096 ut_fini_bdev(void)
1097 {
1098 	spdk_bdev_finish(bdev_fini_cb, NULL);
1099 	spdk_iobuf_finish(bdev_fini_cb, NULL);
1100 	poll_threads();
1101 }
1102 
1103 struct bdev_ut_io_wait_entry {
1104 	struct spdk_bdev_io_wait_entry	entry;
1105 	struct spdk_io_channel		*io_ch;
1106 	struct spdk_bdev_desc		*desc;
1107 	bool				submitted;
1108 };
1109 
1110 static void
1111 io_wait_cb(void *arg)
1112 {
1113 	struct bdev_ut_io_wait_entry *entry = arg;
1114 	int rc;
1115 
1116 	rc = spdk_bdev_read_blocks(entry->desc, entry->io_ch, NULL, 0, 1, io_done, NULL);
1117 	CU_ASSERT(rc == 0);
1118 	entry->submitted = true;
1119 }
1120 
1121 static void
1122 bdev_io_types_test(void)
1123 {
1124 	struct spdk_bdev *bdev;
1125 	struct spdk_bdev_desc *desc = NULL;
1126 	struct spdk_io_channel *io_ch;
1127 	struct spdk_bdev_opts bdev_opts = {};
1128 	int rc;
1129 
1130 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1131 	bdev_opts.bdev_io_pool_size = 4;
1132 	bdev_opts.bdev_io_cache_size = 2;
1133 	ut_init_bdev(&bdev_opts);
1134 
1135 	bdev = allocate_bdev("bdev0");
1136 
1137 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
1138 	CU_ASSERT(rc == 0);
1139 	poll_threads();
1140 	SPDK_CU_ASSERT_FATAL(desc != NULL);
1141 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
1142 	io_ch = spdk_bdev_get_io_channel(desc);
1143 	CU_ASSERT(io_ch != NULL);
1144 
1145 	/* WRITE and WRITE ZEROES are not supported */
1146 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, false);
1147 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE, false);
1148 	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 0, 128, io_done, NULL);
1149 	CU_ASSERT(rc == -ENOTSUP);
1150 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, true);
1151 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE, true);
1152 
1153 	/* COPY is not supported */
1154 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_COPY, false);
1155 	rc = spdk_bdev_copy_blocks(desc, io_ch, 128, 0, 128, io_done, NULL);
1156 	CU_ASSERT(rc == -ENOTSUP);
1157 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_COPY, true);
1158 
1159 	/* NVME_IO, NVME_IO_MD and NVME_ADMIN are not supported */
1160 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO, false);
1161 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO_MD, false);
1162 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_ADMIN, false);
1163 	rc = spdk_bdev_nvme_io_passthru(desc, io_ch, NULL, NULL, 0, NULL, NULL);
1164 	CU_ASSERT(rc == -ENOTSUP);
1165 	rc = spdk_bdev_nvme_io_passthru_md(desc, io_ch, NULL, NULL, 0, NULL, 0, NULL, NULL);
1166 	CU_ASSERT(rc == -ENOTSUP);
1167 	rc = spdk_bdev_nvme_admin_passthru(desc, io_ch, NULL, NULL, 0, NULL, NULL);
1168 	CU_ASSERT(rc == -ENOTSUP);
1169 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO, true);
1170 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_IO_MD, true);
1171 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_NVME_ADMIN, true);
1172 
1173 	spdk_put_io_channel(io_ch);
1174 	spdk_bdev_close(desc);
1175 	free_bdev(bdev);
1176 	ut_fini_bdev();
1177 }
1178 
1179 static void
1180 bdev_io_wait_test(void)
1181 {
1182 	struct spdk_bdev *bdev;
1183 	struct spdk_bdev_desc *desc = NULL;
1184 	struct spdk_io_channel *io_ch;
1185 	struct spdk_bdev_opts bdev_opts = {};
1186 	struct bdev_ut_io_wait_entry io_wait_entry;
1187 	struct bdev_ut_io_wait_entry io_wait_entry2;
1188 	int rc;
1189 
1190 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1191 	bdev_opts.bdev_io_pool_size = 4;
1192 	bdev_opts.bdev_io_cache_size = 2;
1193 	ut_init_bdev(&bdev_opts);
1194 
1195 	bdev = allocate_bdev("bdev0");
1196 
1197 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
1198 	CU_ASSERT(rc == 0);
1199 	poll_threads();
1200 	SPDK_CU_ASSERT_FATAL(desc != NULL);
1201 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
1202 	io_ch = spdk_bdev_get_io_channel(desc);
1203 	CU_ASSERT(io_ch != NULL);
1204 
1205 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1206 	CU_ASSERT(rc == 0);
1207 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1208 	CU_ASSERT(rc == 0);
1209 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1210 	CU_ASSERT(rc == 0);
1211 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1212 	CU_ASSERT(rc == 0);
1213 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
1214 
1215 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
1216 	CU_ASSERT(rc == -ENOMEM);
1217 
1218 	io_wait_entry.entry.bdev = bdev;
1219 	io_wait_entry.entry.cb_fn = io_wait_cb;
1220 	io_wait_entry.entry.cb_arg = &io_wait_entry;
1221 	io_wait_entry.io_ch = io_ch;
1222 	io_wait_entry.desc = desc;
1223 	io_wait_entry.submitted = false;
1224 	/* Cannot use the same io_wait_entry for two different calls. */
1225 	memcpy(&io_wait_entry2, &io_wait_entry, sizeof(io_wait_entry));
1226 	io_wait_entry2.entry.cb_arg = &io_wait_entry2;
1227 
1228 	/* Queue two I/O waits. */
1229 	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry.entry);
1230 	CU_ASSERT(rc == 0);
1231 	CU_ASSERT(io_wait_entry.submitted == false);
1232 	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry2.entry);
1233 	CU_ASSERT(rc == 0);
1234 	CU_ASSERT(io_wait_entry2.submitted == false);
1235 
1236 	stub_complete_io(1);
1237 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
1238 	CU_ASSERT(io_wait_entry.submitted == true);
1239 	CU_ASSERT(io_wait_entry2.submitted == false);
1240 
1241 	stub_complete_io(1);
1242 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
1243 	CU_ASSERT(io_wait_entry2.submitted == true);
1244 
1245 	stub_complete_io(4);
1246 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1247 
1248 	spdk_put_io_channel(io_ch);
1249 	spdk_bdev_close(desc);
1250 	free_bdev(bdev);
1251 	ut_fini_bdev();
1252 }
1253 
1254 static void
1255 bdev_io_spans_split_test(void)
1256 {
1257 	struct spdk_bdev bdev;
1258 	struct spdk_bdev_io bdev_io;
1259 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV];
1260 
1261 	memset(&bdev, 0, sizeof(bdev));
1262 	bdev_io.u.bdev.iovs = iov;
1263 
1264 	bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
1265 	bdev.optimal_io_boundary = 0;
1266 	bdev.max_segment_size = 0;
1267 	bdev.max_num_segments = 0;
1268 	bdev_io.bdev = &bdev;
1269 
1270 	/* bdev has no optimal_io_boundary and max_size set - so this should return false. */
1271 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1272 
1273 	bdev.split_on_optimal_io_boundary = true;
1274 	bdev.optimal_io_boundary = 32;
1275 	bdev_io.type = SPDK_BDEV_IO_TYPE_RESET;
1276 
1277 	/* RESETs are not based on LBAs - so this should return false. */
1278 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1279 
1280 	bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
1281 	bdev_io.u.bdev.offset_blocks = 0;
1282 	bdev_io.u.bdev.num_blocks = 32;
1283 
1284 	/* This I/O run right up to, but does not cross, the boundary - so this should return false. */
1285 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1286 
1287 	bdev_io.u.bdev.num_blocks = 33;
1288 
1289 	/* This I/O spans a boundary. */
1290 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1291 
1292 	bdev_io.u.bdev.num_blocks = 32;
1293 	bdev.max_segment_size = 512 * 32;
1294 	bdev.max_num_segments = 1;
1295 	bdev_io.u.bdev.iovcnt = 1;
1296 	iov[0].iov_len = 512;
1297 
1298 	/* Does not cross and exceed max_size or max_segs */
1299 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1300 
1301 	bdev.split_on_optimal_io_boundary = false;
1302 	bdev.max_segment_size = 512;
1303 	bdev.max_num_segments = 1;
1304 	bdev_io.u.bdev.iovcnt = 2;
1305 
1306 	/* Exceed max_segs */
1307 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1308 
1309 	bdev.max_num_segments = 2;
1310 	iov[0].iov_len = 513;
1311 	iov[1].iov_len = 512;
1312 
1313 	/* Exceed max_sizes */
1314 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1315 
1316 	bdev.max_segment_size = 0;
1317 	bdev.write_unit_size = 32;
1318 	bdev.split_on_write_unit = true;
1319 	bdev_io.type = SPDK_BDEV_IO_TYPE_WRITE;
1320 
1321 	/* This I/O is one write unit */
1322 	CU_ASSERT(bdev_io_should_split(&bdev_io) == false);
1323 
1324 	bdev_io.u.bdev.num_blocks = 32 * 2;
1325 
1326 	/* This I/O is more than one write unit */
1327 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1328 
1329 	bdev_io.u.bdev.offset_blocks = 1;
1330 	bdev_io.u.bdev.num_blocks = 32;
1331 
1332 	/* This I/O is not aligned to write unit size */
1333 	CU_ASSERT(bdev_io_should_split(&bdev_io) == true);
1334 }
1335 
1336 static void
1337 bdev_io_boundary_split_test(void)
1338 {
1339 	struct spdk_bdev *bdev;
1340 	struct spdk_bdev_desc *desc = NULL;
1341 	struct spdk_io_channel *io_ch;
1342 	struct spdk_bdev_opts bdev_opts = {};
1343 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
1344 	struct ut_expected_io *expected_io;
1345 	void *md_buf = (void *)0xFF000000;
1346 	uint64_t i;
1347 	int rc;
1348 
1349 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1350 	bdev_opts.bdev_io_pool_size = 512;
1351 	bdev_opts.bdev_io_cache_size = 64;
1352 	ut_init_bdev(&bdev_opts);
1353 
1354 	bdev = allocate_bdev("bdev0");
1355 
1356 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
1357 	CU_ASSERT(rc == 0);
1358 	SPDK_CU_ASSERT_FATAL(desc != NULL);
1359 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
1360 	io_ch = spdk_bdev_get_io_channel(desc);
1361 	CU_ASSERT(io_ch != NULL);
1362 
1363 	bdev->optimal_io_boundary = 16;
1364 	bdev->split_on_optimal_io_boundary = false;
1365 
1366 	g_io_done = false;
1367 
1368 	/* First test that the I/O does not get split if split_on_optimal_io_boundary == false. */
1369 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 8, 1);
1370 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 8 * 512);
1371 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1372 
1373 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL);
1374 	CU_ASSERT(rc == 0);
1375 	CU_ASSERT(g_io_done == false);
1376 
1377 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1378 	stub_complete_io(1);
1379 	CU_ASSERT(g_io_done == true);
1380 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1381 
1382 	bdev->split_on_optimal_io_boundary = true;
1383 	bdev->md_interleave = false;
1384 	bdev->md_len = 8;
1385 
1386 	/* Now test that a single-vector command is split correctly.
1387 	 * Offset 14, length 8, payload 0xF000
1388 	 *  Child - Offset 14, length 2, payload 0xF000
1389 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
1390 	 *
1391 	 * Set up the expected values before calling spdk_bdev_read_blocks
1392 	 */
1393 	g_io_done = false;
1394 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
1395 	expected_io->md_buf = md_buf;
1396 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
1397 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1398 
1399 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
1400 	expected_io->md_buf = md_buf + 2 * 8;
1401 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
1402 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1403 
1404 	/* spdk_bdev_read_blocks will submit the first child immediately. */
1405 	rc = spdk_bdev_read_blocks_with_md(desc, io_ch, (void *)0xF000, md_buf,
1406 					   14, 8, io_done, NULL);
1407 	CU_ASSERT(rc == 0);
1408 	CU_ASSERT(g_io_done == false);
1409 
1410 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
1411 	stub_complete_io(2);
1412 	CU_ASSERT(g_io_done == true);
1413 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1414 
1415 	/* Now set up a more complex, multi-vector command that needs to be split,
1416 	 *  including splitting iovecs.
1417 	 */
1418 	iov[0].iov_base = (void *)0x10000;
1419 	iov[0].iov_len = 512;
1420 	iov[1].iov_base = (void *)0x20000;
1421 	iov[1].iov_len = 20 * 512;
1422 	iov[2].iov_base = (void *)0x30000;
1423 	iov[2].iov_len = 11 * 512;
1424 
1425 	g_io_done = false;
1426 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 2);
1427 	expected_io->md_buf = md_buf;
1428 	ut_expected_io_set_iov(expected_io, 0, (void *)0x10000, 512);
1429 	ut_expected_io_set_iov(expected_io, 1, (void *)0x20000, 512);
1430 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1431 
1432 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 16, 1);
1433 	expected_io->md_buf = md_buf + 2 * 8;
1434 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 512), 16 * 512);
1435 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1436 
1437 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 2);
1438 	expected_io->md_buf = md_buf + 18 * 8;
1439 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 17 * 512), 3 * 512);
1440 	ut_expected_io_set_iov(expected_io, 1, (void *)0x30000, 11 * 512);
1441 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1442 
1443 	rc = spdk_bdev_writev_blocks_with_md(desc, io_ch, iov, 3, md_buf,
1444 					     14, 32, io_done, NULL);
1445 	CU_ASSERT(rc == 0);
1446 	CU_ASSERT(g_io_done == false);
1447 
1448 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
1449 	stub_complete_io(3);
1450 	CU_ASSERT(g_io_done == true);
1451 
1452 	/* Test multi vector command that needs to be split by strip and then needs to be
1453 	 * split further due to the capacity of child iovs.
1454 	 */
1455 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV * 2; i++) {
1456 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1457 		iov[i].iov_len = 512;
1458 	}
1459 
1460 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1461 	g_io_done = false;
1462 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, SPDK_BDEV_IO_NUM_CHILD_IOV,
1463 					   SPDK_BDEV_IO_NUM_CHILD_IOV);
1464 	expected_io->md_buf = md_buf;
1465 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1466 		ut_expected_io_set_iov(expected_io, i, (void *)((i + 1) * 0x10000), 512);
1467 	}
1468 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1469 
1470 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1471 					   SPDK_BDEV_IO_NUM_CHILD_IOV, SPDK_BDEV_IO_NUM_CHILD_IOV);
1472 	expected_io->md_buf = md_buf + SPDK_BDEV_IO_NUM_CHILD_IOV * 8;
1473 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1474 		ut_expected_io_set_iov(expected_io, i,
1475 				       (void *)((i + 1 + SPDK_BDEV_IO_NUM_CHILD_IOV) * 0x10000), 512);
1476 	}
1477 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1478 
1479 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, md_buf,
1480 					    0, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, io_done, NULL);
1481 	CU_ASSERT(rc == 0);
1482 	CU_ASSERT(g_io_done == false);
1483 
1484 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1485 	stub_complete_io(1);
1486 	CU_ASSERT(g_io_done == false);
1487 
1488 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1489 	stub_complete_io(1);
1490 	CU_ASSERT(g_io_done == true);
1491 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1492 
1493 	/* Test multi vector command that needs to be split by strip and then needs to be
1494 	 * split further due to the capacity of child iovs. In this case, the length of
1495 	 * the rest of iovec array with an I/O boundary is the multiple of block size.
1496 	 */
1497 
1498 	/* Fill iovec array for exactly one boundary. The iovec cnt for this boundary
1499 	 * is SPDK_BDEV_IO_NUM_CHILD_IOV + 1, which exceeds the capacity of child iovs.
1500 	 */
1501 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
1502 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1503 		iov[i].iov_len = 512;
1504 	}
1505 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1506 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1507 		iov[i].iov_len = 256;
1508 	}
1509 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000);
1510 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_len = 512;
1511 
1512 	/* Add an extra iovec to trigger split */
1513 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000);
1514 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_len = 512;
1515 
1516 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1517 	g_io_done = false;
1518 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
1519 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 1, SPDK_BDEV_IO_NUM_CHILD_IOV);
1520 	expected_io->md_buf = md_buf;
1521 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
1522 		ut_expected_io_set_iov(expected_io, i,
1523 				       (void *)((i + 1) * 0x10000), 512);
1524 	}
1525 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
1526 		ut_expected_io_set_iov(expected_io, i,
1527 				       (void *)((i + 1) * 0x10000), 256);
1528 	}
1529 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1530 
1531 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV - 1,
1532 					   1, 1);
1533 	expected_io->md_buf = md_buf + (SPDK_BDEV_IO_NUM_CHILD_IOV - 1) * 8;
1534 	ut_expected_io_set_iov(expected_io, 0,
1535 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000), 512);
1536 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1537 
1538 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1539 					   1, 1);
1540 	expected_io->md_buf = md_buf + SPDK_BDEV_IO_NUM_CHILD_IOV * 8;
1541 	ut_expected_io_set_iov(expected_io, 0,
1542 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000), 512);
1543 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1544 
1545 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 2, md_buf,
1546 					    0, SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1547 	CU_ASSERT(rc == 0);
1548 	CU_ASSERT(g_io_done == false);
1549 
1550 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1551 	stub_complete_io(1);
1552 	CU_ASSERT(g_io_done == false);
1553 
1554 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
1555 	stub_complete_io(2);
1556 	CU_ASSERT(g_io_done == true);
1557 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1558 
1559 	/* Test multi vector command that needs to be split by strip and then needs to be
1560 	 * split further due to the capacity of child iovs, the child request offset should
1561 	 * be rewind to last aligned offset and go success without error.
1562 	 */
1563 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 1; i++) {
1564 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1565 		iov[i].iov_len = 512;
1566 	}
1567 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_base = (void *)(SPDK_BDEV_IO_NUM_CHILD_IOV * 0x10000);
1568 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_len = 256;
1569 
1570 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000);
1571 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_len = 256;
1572 
1573 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000);
1574 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_len = 512;
1575 
1576 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1577 	g_io_done = false;
1578 	g_io_status = 0;
1579 	/* The first expected io should be start from offset 0 to SPDK_BDEV_IO_NUM_CHILD_IOV - 1 */
1580 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
1581 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 1);
1582 	expected_io->md_buf = md_buf;
1583 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 1; i++) {
1584 		ut_expected_io_set_iov(expected_io, i,
1585 				       (void *)((i + 1) * 0x10000), 512);
1586 	}
1587 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1588 	/* The second expected io should be start from offset SPDK_BDEV_IO_NUM_CHILD_IOV - 1 to SPDK_BDEV_IO_NUM_CHILD_IOV */
1589 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV - 1,
1590 					   1, 2);
1591 	expected_io->md_buf = md_buf + (SPDK_BDEV_IO_NUM_CHILD_IOV - 1) * 8;
1592 	ut_expected_io_set_iov(expected_io, 0,
1593 			       (void *)(SPDK_BDEV_IO_NUM_CHILD_IOV * 0x10000), 256);
1594 	ut_expected_io_set_iov(expected_io, 1,
1595 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000), 256);
1596 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1597 	/* The third expected io should be start from offset SPDK_BDEV_IO_NUM_CHILD_IOV to SPDK_BDEV_IO_NUM_CHILD_IOV + 1 */
1598 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1599 					   1, 1);
1600 	expected_io->md_buf = md_buf + SPDK_BDEV_IO_NUM_CHILD_IOV * 8;
1601 	ut_expected_io_set_iov(expected_io, 0,
1602 			       (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000), 512);
1603 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1604 
1605 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, md_buf,
1606 					    0, SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1607 	CU_ASSERT(rc == 0);
1608 	CU_ASSERT(g_io_done == false);
1609 
1610 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1611 	stub_complete_io(1);
1612 	CU_ASSERT(g_io_done == false);
1613 
1614 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
1615 	stub_complete_io(2);
1616 	CU_ASSERT(g_io_done == true);
1617 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1618 
1619 	/* Test multi vector command that needs to be split due to the IO boundary and
1620 	 * the capacity of child iovs. Especially test the case when the command is
1621 	 * split due to the capacity of child iovs, the tail address is not aligned with
1622 	 * block size and is rewinded to the aligned address.
1623 	 *
1624 	 * The iovecs used in read request is complex but is based on the data
1625 	 * collected in the real issue. We change the base addresses but keep the lengths
1626 	 * not to loose the credibility of the test.
1627 	 */
1628 	bdev->optimal_io_boundary = 128;
1629 	g_io_done = false;
1630 	g_io_status = 0;
1631 
1632 	for (i = 0; i < 31; i++) {
1633 		iov[i].iov_base = (void *)(0xFEED0000000 + (i << 20));
1634 		iov[i].iov_len = 1024;
1635 	}
1636 	iov[31].iov_base = (void *)0xFEED1F00000;
1637 	iov[31].iov_len = 32768;
1638 	iov[32].iov_base = (void *)0xFEED2000000;
1639 	iov[32].iov_len = 160;
1640 	iov[33].iov_base = (void *)0xFEED2100000;
1641 	iov[33].iov_len = 4096;
1642 	iov[34].iov_base = (void *)0xFEED2200000;
1643 	iov[34].iov_len = 4096;
1644 	iov[35].iov_base = (void *)0xFEED2300000;
1645 	iov[35].iov_len = 4096;
1646 	iov[36].iov_base = (void *)0xFEED2400000;
1647 	iov[36].iov_len = 4096;
1648 	iov[37].iov_base = (void *)0xFEED2500000;
1649 	iov[37].iov_len = 4096;
1650 	iov[38].iov_base = (void *)0xFEED2600000;
1651 	iov[38].iov_len = 4096;
1652 	iov[39].iov_base = (void *)0xFEED2700000;
1653 	iov[39].iov_len = 4096;
1654 	iov[40].iov_base = (void *)0xFEED2800000;
1655 	iov[40].iov_len = 4096;
1656 	iov[41].iov_base = (void *)0xFEED2900000;
1657 	iov[41].iov_len = 4096;
1658 	iov[42].iov_base = (void *)0xFEED2A00000;
1659 	iov[42].iov_len = 4096;
1660 	iov[43].iov_base = (void *)0xFEED2B00000;
1661 	iov[43].iov_len = 12288;
1662 	iov[44].iov_base = (void *)0xFEED2C00000;
1663 	iov[44].iov_len = 8192;
1664 	iov[45].iov_base = (void *)0xFEED2F00000;
1665 	iov[45].iov_len = 4096;
1666 	iov[46].iov_base = (void *)0xFEED3000000;
1667 	iov[46].iov_len = 4096;
1668 	iov[47].iov_base = (void *)0xFEED3100000;
1669 	iov[47].iov_len = 4096;
1670 	iov[48].iov_base = (void *)0xFEED3200000;
1671 	iov[48].iov_len = 24576;
1672 	iov[49].iov_base = (void *)0xFEED3300000;
1673 	iov[49].iov_len = 16384;
1674 	iov[50].iov_base = (void *)0xFEED3400000;
1675 	iov[50].iov_len = 12288;
1676 	iov[51].iov_base = (void *)0xFEED3500000;
1677 	iov[51].iov_len = 4096;
1678 	iov[52].iov_base = (void *)0xFEED3600000;
1679 	iov[52].iov_len = 4096;
1680 	iov[53].iov_base = (void *)0xFEED3700000;
1681 	iov[53].iov_len = 4096;
1682 	iov[54].iov_base = (void *)0xFEED3800000;
1683 	iov[54].iov_len = 28672;
1684 	iov[55].iov_base = (void *)0xFEED3900000;
1685 	iov[55].iov_len = 20480;
1686 	iov[56].iov_base = (void *)0xFEED3A00000;
1687 	iov[56].iov_len = 4096;
1688 	iov[57].iov_base = (void *)0xFEED3B00000;
1689 	iov[57].iov_len = 12288;
1690 	iov[58].iov_base = (void *)0xFEED3C00000;
1691 	iov[58].iov_len = 4096;
1692 	iov[59].iov_base = (void *)0xFEED3D00000;
1693 	iov[59].iov_len = 4096;
1694 	iov[60].iov_base = (void *)0xFEED3E00000;
1695 	iov[60].iov_len = 352;
1696 
1697 	/* The 1st child IO must be from iov[0] to iov[31] split by the capacity
1698 	 * of child iovs,
1699 	 */
1700 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 126, 32);
1701 	expected_io->md_buf = md_buf;
1702 	for (i = 0; i < 32; i++) {
1703 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
1704 	}
1705 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1706 
1707 	/* The 2nd child IO must be from iov[32] to the first 864 bytes of iov[33]
1708 	 * split by the IO boundary requirement.
1709 	 */
1710 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 126, 2, 2);
1711 	expected_io->md_buf = md_buf + 126 * 8;
1712 	ut_expected_io_set_iov(expected_io, 0, iov[32].iov_base, iov[32].iov_len);
1713 	ut_expected_io_set_iov(expected_io, 1, iov[33].iov_base, 864);
1714 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1715 
1716 	/* The 3rd child IO must be from the remaining 3232 bytes of iov[33] to
1717 	 * the first 864 bytes of iov[46] split by the IO boundary requirement.
1718 	 */
1719 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 128, 128, 14);
1720 	expected_io->md_buf = md_buf + 128 * 8;
1721 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[33].iov_base + 864),
1722 			       iov[33].iov_len - 864);
1723 	ut_expected_io_set_iov(expected_io, 1, iov[34].iov_base, iov[34].iov_len);
1724 	ut_expected_io_set_iov(expected_io, 2, iov[35].iov_base, iov[35].iov_len);
1725 	ut_expected_io_set_iov(expected_io, 3, iov[36].iov_base, iov[36].iov_len);
1726 	ut_expected_io_set_iov(expected_io, 4, iov[37].iov_base, iov[37].iov_len);
1727 	ut_expected_io_set_iov(expected_io, 5, iov[38].iov_base, iov[38].iov_len);
1728 	ut_expected_io_set_iov(expected_io, 6, iov[39].iov_base, iov[39].iov_len);
1729 	ut_expected_io_set_iov(expected_io, 7, iov[40].iov_base, iov[40].iov_len);
1730 	ut_expected_io_set_iov(expected_io, 8, iov[41].iov_base, iov[41].iov_len);
1731 	ut_expected_io_set_iov(expected_io, 9, iov[42].iov_base, iov[42].iov_len);
1732 	ut_expected_io_set_iov(expected_io, 10, iov[43].iov_base, iov[43].iov_len);
1733 	ut_expected_io_set_iov(expected_io, 11, iov[44].iov_base, iov[44].iov_len);
1734 	ut_expected_io_set_iov(expected_io, 12, iov[45].iov_base, iov[45].iov_len);
1735 	ut_expected_io_set_iov(expected_io, 13, iov[46].iov_base, 864);
1736 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1737 
1738 	/* The 4th child IO must be from the remaining 3232 bytes of iov[46] to the
1739 	 * first 864 bytes of iov[52] split by the IO boundary requirement.
1740 	 */
1741 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 256, 128, 7);
1742 	expected_io->md_buf = md_buf + 256 * 8;
1743 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[46].iov_base + 864),
1744 			       iov[46].iov_len - 864);
1745 	ut_expected_io_set_iov(expected_io, 1, iov[47].iov_base, iov[47].iov_len);
1746 	ut_expected_io_set_iov(expected_io, 2, iov[48].iov_base, iov[48].iov_len);
1747 	ut_expected_io_set_iov(expected_io, 3, iov[49].iov_base, iov[49].iov_len);
1748 	ut_expected_io_set_iov(expected_io, 4, iov[50].iov_base, iov[50].iov_len);
1749 	ut_expected_io_set_iov(expected_io, 5, iov[51].iov_base, iov[51].iov_len);
1750 	ut_expected_io_set_iov(expected_io, 6, iov[52].iov_base, 864);
1751 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1752 
1753 	/* The 5th child IO must be from the remaining 3232 bytes of iov[52] to
1754 	 * the first 4096 bytes of iov[57] split by the IO boundary requirement.
1755 	 */
1756 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 384, 128, 6);
1757 	expected_io->md_buf = md_buf + 384 * 8;
1758 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[52].iov_base + 864),
1759 			       iov[52].iov_len - 864);
1760 	ut_expected_io_set_iov(expected_io, 1, iov[53].iov_base, iov[53].iov_len);
1761 	ut_expected_io_set_iov(expected_io, 2, iov[54].iov_base, iov[54].iov_len);
1762 	ut_expected_io_set_iov(expected_io, 3, iov[55].iov_base, iov[55].iov_len);
1763 	ut_expected_io_set_iov(expected_io, 4, iov[56].iov_base, iov[56].iov_len);
1764 	ut_expected_io_set_iov(expected_io, 5, iov[57].iov_base, 4960);
1765 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1766 
1767 	/* The 6th child IO must be from the remaining 7328 bytes of iov[57]
1768 	 * to the first 3936 bytes of iov[58] split by the capacity of child iovs.
1769 	 */
1770 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 512, 30, 3);
1771 	expected_io->md_buf = md_buf + 512 * 8;
1772 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[57].iov_base + 4960),
1773 			       iov[57].iov_len - 4960);
1774 	ut_expected_io_set_iov(expected_io, 1, iov[58].iov_base, iov[58].iov_len);
1775 	ut_expected_io_set_iov(expected_io, 2, iov[59].iov_base, 3936);
1776 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1777 
1778 	/* The 7th child IO is from the remaining 160 bytes of iov[59] and iov[60]. */
1779 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 542, 1, 2);
1780 	expected_io->md_buf = md_buf + 542 * 8;
1781 	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[59].iov_base + 3936),
1782 			       iov[59].iov_len - 3936);
1783 	ut_expected_io_set_iov(expected_io, 1, iov[60].iov_base, iov[60].iov_len);
1784 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1785 
1786 	rc = spdk_bdev_readv_blocks_with_md(desc, io_ch, iov, 61, md_buf,
1787 					    0, 543, io_done, NULL);
1788 	CU_ASSERT(rc == 0);
1789 	CU_ASSERT(g_io_done == false);
1790 
1791 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1792 	stub_complete_io(1);
1793 	CU_ASSERT(g_io_done == false);
1794 
1795 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 5);
1796 	stub_complete_io(5);
1797 	CU_ASSERT(g_io_done == false);
1798 
1799 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1800 	stub_complete_io(1);
1801 	CU_ASSERT(g_io_done == true);
1802 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1803 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
1804 
1805 	/* Test a WRITE_ZEROES that would span an I/O boundary.  WRITE_ZEROES should not be
1806 	 * split, so test that.
1807 	 */
1808 	bdev->optimal_io_boundary = 15;
1809 	g_io_done = false;
1810 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 9, 36, 0);
1811 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1812 
1813 	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
1814 	CU_ASSERT(rc == 0);
1815 	CU_ASSERT(g_io_done == false);
1816 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1817 	stub_complete_io(1);
1818 	CU_ASSERT(g_io_done == true);
1819 
1820 	/* Test an UNMAP.  This should also not be split. */
1821 	bdev->optimal_io_boundary = 16;
1822 	g_io_done = false;
1823 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, 15, 2, 0);
1824 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1825 
1826 	rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 2, io_done, NULL);
1827 	CU_ASSERT(rc == 0);
1828 	CU_ASSERT(g_io_done == false);
1829 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1830 	stub_complete_io(1);
1831 	CU_ASSERT(g_io_done == true);
1832 
1833 	/* Test a FLUSH.  This should also not be split. */
1834 	bdev->optimal_io_boundary = 16;
1835 	g_io_done = false;
1836 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_FLUSH, 15, 2, 0);
1837 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1838 
1839 	rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
1840 	CU_ASSERT(rc == 0);
1841 	CU_ASSERT(g_io_done == false);
1842 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1843 	stub_complete_io(1);
1844 	CU_ASSERT(g_io_done == true);
1845 
1846 	/* Test a COPY.  This should also not be split. */
1847 	bdev->optimal_io_boundary = 15;
1848 	g_io_done = false;
1849 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 9, 45, 36);
1850 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1851 
1852 	rc = spdk_bdev_copy_blocks(desc, io_ch, 9, 45, 36, io_done, NULL);
1853 	CU_ASSERT(rc == 0);
1854 	CU_ASSERT(g_io_done == false);
1855 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1856 	stub_complete_io(1);
1857 	CU_ASSERT(g_io_done == true);
1858 
1859 	CU_ASSERT(TAILQ_EMPTY(&g_bdev_ut_channel->expected_io));
1860 
1861 	/* Children requests return an error status */
1862 	bdev->optimal_io_boundary = 16;
1863 	iov[0].iov_base = (void *)0x10000;
1864 	iov[0].iov_len = 512 * 64;
1865 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
1866 	g_io_done = false;
1867 	g_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1868 
1869 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 1, 1, 64, io_done, NULL);
1870 	CU_ASSERT(rc == 0);
1871 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 5);
1872 	stub_complete_io(4);
1873 	CU_ASSERT(g_io_done == false);
1874 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
1875 	stub_complete_io(1);
1876 	CU_ASSERT(g_io_done == true);
1877 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
1878 
1879 	/* Test if a multi vector command terminated with failure before continuing
1880 	 * splitting process when one of child I/O failed.
1881 	 * The multi vector command is as same as the above that needs to be split by strip
1882 	 * and then needs to be split further due to the capacity of child iovs.
1883 	 */
1884 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 1; i++) {
1885 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1886 		iov[i].iov_len = 512;
1887 	}
1888 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_base = (void *)(SPDK_BDEV_IO_NUM_CHILD_IOV * 0x10000);
1889 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_len = 256;
1890 
1891 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 1) * 0x10000);
1892 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV].iov_len = 256;
1893 
1894 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_base = (void *)((SPDK_BDEV_IO_NUM_CHILD_IOV + 2) * 0x10000);
1895 	iov[SPDK_BDEV_IO_NUM_CHILD_IOV + 1].iov_len = 512;
1896 
1897 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
1898 
1899 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
1900 	g_io_done = false;
1901 	g_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1902 
1903 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, 0,
1904 				    SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1905 	CU_ASSERT(rc == 0);
1906 	CU_ASSERT(g_io_done == false);
1907 
1908 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1909 	stub_complete_io(1);
1910 	CU_ASSERT(g_io_done == true);
1911 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
1912 
1913 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
1914 
1915 	/* for this test we will create the following conditions to hit the code path where
1916 	 * we are trying to send and IO following a split that has no iovs because we had to
1917 	 * trim them for alignment reasons.
1918 	 *
1919 	 * - 16K boundary, our IO will start at offset 0 with a length of 0x4200
1920 	 * - Our IOVs are 0x212 in size so that we run into the 16K boundary at child IOV
1921 	 *   position 30 and overshoot by 0x2e.
1922 	 * - That means we'll send the IO and loop back to pick up the remaining bytes at
1923 	 *   child IOV index 31. When we do, we find that we have to shorten index 31 by 0x2e
1924 	 *   which eliniates that vector so we just send the first split IO with 30 vectors
1925 	 *   and let the completion pick up the last 2 vectors.
1926 	 */
1927 	bdev->optimal_io_boundary = 32;
1928 	bdev->split_on_optimal_io_boundary = true;
1929 	g_io_done = false;
1930 
1931 	/* Init all parent IOVs to 0x212 */
1932 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV + 2; i++) {
1933 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
1934 		iov[i].iov_len = 0x212;
1935 	}
1936 
1937 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, SPDK_BDEV_IO_NUM_CHILD_IOV,
1938 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 1);
1939 	/* expect 0-29 to be 1:1 with the parent iov */
1940 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
1941 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
1942 	}
1943 
1944 	/* expect index 30 to be shortened to 0x1e4 (0x212 - 0x1e) because of the alignment
1945 	 * where 0x1e is the amount we overshot the 16K boundary
1946 	 */
1947 	ut_expected_io_set_iov(expected_io, SPDK_BDEV_IO_NUM_CHILD_IOV - 2,
1948 			       (void *)(iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 2].iov_base), 0x1e4);
1949 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1950 
1951 	/* 2nd child IO will have 2 remaining vectors, one to pick up from the one that was
1952 	 * shortened that take it to the next boundary and then a final one to get us to
1953 	 * 0x4200 bytes for the IO.
1954 	 */
1955 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV,
1956 					   SPDK_BDEV_IO_NUM_CHILD_IOV, 2);
1957 	/* position 30 picked up the remaining bytes to the next boundary */
1958 	ut_expected_io_set_iov(expected_io, 0,
1959 			       (void *)(iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 2].iov_base + 0x1e4), 0x2e);
1960 
1961 	/* position 31 picked the the rest of the transfer to get us to 0x4200 */
1962 	ut_expected_io_set_iov(expected_io, 1,
1963 			       (void *)(iov[SPDK_BDEV_IO_NUM_CHILD_IOV - 1].iov_base), 0x1d2);
1964 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
1965 
1966 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 1, 0,
1967 				    SPDK_BDEV_IO_NUM_CHILD_IOV + 1, io_done, NULL);
1968 	CU_ASSERT(rc == 0);
1969 	CU_ASSERT(g_io_done == false);
1970 
1971 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1972 	stub_complete_io(1);
1973 	CU_ASSERT(g_io_done == false);
1974 
1975 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
1976 	stub_complete_io(1);
1977 	CU_ASSERT(g_io_done == true);
1978 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
1979 
1980 	spdk_put_io_channel(io_ch);
1981 	spdk_bdev_close(desc);
1982 	free_bdev(bdev);
1983 	ut_fini_bdev();
1984 }
1985 
1986 static void
1987 bdev_io_max_size_and_segment_split_test(void)
1988 {
1989 	struct spdk_bdev *bdev;
1990 	struct spdk_bdev_desc *desc = NULL;
1991 	struct spdk_io_channel *io_ch;
1992 	struct spdk_bdev_opts bdev_opts = {};
1993 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
1994 	struct ut_expected_io *expected_io;
1995 	uint64_t i;
1996 	int rc;
1997 
1998 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
1999 	bdev_opts.bdev_io_pool_size = 512;
2000 	bdev_opts.bdev_io_cache_size = 64;
2001 	bdev_opts.opts_size = sizeof(bdev_opts);
2002 	ut_init_bdev(&bdev_opts);
2003 
2004 	bdev = allocate_bdev("bdev0");
2005 
2006 	rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
2007 	CU_ASSERT(rc == 0);
2008 	SPDK_CU_ASSERT_FATAL(desc != NULL);
2009 	io_ch = spdk_bdev_get_io_channel(desc);
2010 	CU_ASSERT(io_ch != NULL);
2011 
2012 	bdev->split_on_optimal_io_boundary = false;
2013 	bdev->optimal_io_boundary = 0;
2014 
2015 	/* Case 0 max_num_segments == 0.
2016 	 * but segment size 2 * 512 > 512
2017 	 */
2018 	bdev->max_segment_size = 512;
2019 	bdev->max_num_segments = 0;
2020 	g_io_done = false;
2021 
2022 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 2);
2023 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512);
2024 	ut_expected_io_set_iov(expected_io, 1, (void *)(0xF000 + 512), 512);
2025 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2026 
2027 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 2, io_done, NULL);
2028 	CU_ASSERT(rc == 0);
2029 	CU_ASSERT(g_io_done == false);
2030 
2031 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2032 	stub_complete_io(1);
2033 	CU_ASSERT(g_io_done == true);
2034 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2035 
2036 	/* Case 1 max_segment_size == 0
2037 	 * but iov num 2 > 1.
2038 	 */
2039 	bdev->max_segment_size = 0;
2040 	bdev->max_num_segments = 1;
2041 	g_io_done = false;
2042 
2043 	iov[0].iov_base = (void *)0x10000;
2044 	iov[0].iov_len = 512;
2045 	iov[1].iov_base = (void *)0x20000;
2046 	iov[1].iov_len = 8 * 512;
2047 
2048 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 1, 1);
2049 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, iov[0].iov_len);
2050 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2051 
2052 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 15, 8, 1);
2053 	ut_expected_io_set_iov(expected_io, 0, iov[1].iov_base, iov[1].iov_len);
2054 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2055 
2056 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 2, 14, 9, io_done, NULL);
2057 	CU_ASSERT(rc == 0);
2058 	CU_ASSERT(g_io_done == false);
2059 
2060 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2061 	stub_complete_io(2);
2062 	CU_ASSERT(g_io_done == true);
2063 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2064 
2065 	/* Test that a non-vector command is split correctly.
2066 	 * Set up the expected values before calling spdk_bdev_read_blocks
2067 	 */
2068 	bdev->max_segment_size = 512;
2069 	bdev->max_num_segments = 1;
2070 	g_io_done = false;
2071 
2072 	/* Child IO 0 */
2073 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 1, 1);
2074 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512);
2075 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2076 
2077 	/* Child IO 1 */
2078 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 15, 1, 1);
2079 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 1 * 512), 512);
2080 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2081 
2082 	/* spdk_bdev_read_blocks will submit the first child immediately. */
2083 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 2, io_done, NULL);
2084 	CU_ASSERT(rc == 0);
2085 	CU_ASSERT(g_io_done == false);
2086 
2087 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2088 	stub_complete_io(2);
2089 	CU_ASSERT(g_io_done == true);
2090 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2091 
2092 	/* Now set up a more complex, multi-vector command that needs to be split,
2093 	 * including splitting iovecs.
2094 	 */
2095 	bdev->max_segment_size = 2 * 512;
2096 	bdev->max_num_segments = 1;
2097 	g_io_done = false;
2098 
2099 	iov[0].iov_base = (void *)0x10000;
2100 	iov[0].iov_len = 2 * 512;
2101 	iov[1].iov_base = (void *)0x20000;
2102 	iov[1].iov_len = 4 * 512;
2103 	iov[2].iov_base = (void *)0x30000;
2104 	iov[2].iov_len = 6 * 512;
2105 
2106 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
2107 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, 512 * 2);
2108 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2109 
2110 	/* Split iov[1].size to 2 iov entries then split the segments */
2111 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 2, 1);
2112 	ut_expected_io_set_iov(expected_io, 0, iov[1].iov_base, 512 * 2);
2113 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2114 
2115 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 18, 2, 1);
2116 	ut_expected_io_set_iov(expected_io, 0, iov[1].iov_base + 512 * 2, 512 * 2);
2117 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2118 
2119 	/* Split iov[2].size to 3 iov entries then split the segments */
2120 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 20, 2, 1);
2121 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base, 512 * 2);
2122 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2123 
2124 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 22, 2, 1);
2125 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 2, 512 * 2);
2126 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2127 
2128 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 24, 2, 1);
2129 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 4, 512 * 2);
2130 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2131 
2132 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 3, 14, 12, io_done, NULL);
2133 	CU_ASSERT(rc == 0);
2134 	CU_ASSERT(g_io_done == false);
2135 
2136 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 6);
2137 	stub_complete_io(6);
2138 	CU_ASSERT(g_io_done == true);
2139 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2140 
2141 	/* Test multi vector command that needs to be split by strip and then needs to be
2142 	 * split further due to the capacity of parent IO child iovs.
2143 	 */
2144 	bdev->max_segment_size = 512;
2145 	bdev->max_num_segments = 1;
2146 	g_io_done = false;
2147 
2148 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
2149 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2150 		iov[i].iov_len = 512 * 2;
2151 	}
2152 
2153 	/* Each input iov.size is split into 2 iovs,
2154 	 * half of the input iov can fill all child iov entries of a single IO.
2155 	 */
2156 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV / 2; i++) {
2157 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 2 * i, 1, 1);
2158 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base, 512);
2159 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2160 
2161 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 2 * i + 1, 1, 1);
2162 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base + 512, 512);
2163 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2164 	}
2165 
2166 	/* The remaining iov is split in the second round */
2167 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV / 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
2168 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 2, 1, 1);
2169 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base, 512);
2170 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2171 
2172 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 2 + 1, 1, 1);
2173 		ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base + 512, 512);
2174 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2175 	}
2176 
2177 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV, 0,
2178 				    SPDK_BDEV_IO_NUM_CHILD_IOV * 2, io_done, NULL);
2179 	CU_ASSERT(rc == 0);
2180 	CU_ASSERT(g_io_done == false);
2181 
2182 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == SPDK_BDEV_IO_NUM_CHILD_IOV);
2183 	stub_complete_io(SPDK_BDEV_IO_NUM_CHILD_IOV);
2184 	CU_ASSERT(g_io_done == false);
2185 
2186 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == SPDK_BDEV_IO_NUM_CHILD_IOV);
2187 	stub_complete_io(SPDK_BDEV_IO_NUM_CHILD_IOV);
2188 	CU_ASSERT(g_io_done == true);
2189 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2190 
2191 	/* A wrong case, a child IO that is divided does
2192 	 * not meet the principle of multiples of block size,
2193 	 * and exits with error
2194 	 */
2195 	bdev->max_segment_size = 512;
2196 	bdev->max_num_segments = 1;
2197 	g_io_done = false;
2198 
2199 	iov[0].iov_base = (void *)0x10000;
2200 	iov[0].iov_len = 512 + 256;
2201 	iov[1].iov_base = (void *)0x20000;
2202 	iov[1].iov_len = 256;
2203 
2204 	/* iov[0] is split to 512 and 256.
2205 	 * 256 is less than a block size, and it is found
2206 	 * in the next round of split that it is the first child IO smaller than
2207 	 * the block size, so the error exit
2208 	 */
2209 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 1, 1);
2210 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, 512);
2211 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2212 
2213 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 2, 0, 2, io_done, NULL);
2214 	CU_ASSERT(rc == 0);
2215 	CU_ASSERT(g_io_done == false);
2216 
2217 	/* First child IO is OK */
2218 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2219 	stub_complete_io(1);
2220 	CU_ASSERT(g_io_done == true);
2221 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2222 
2223 	/* error exit */
2224 	stub_complete_io(1);
2225 	CU_ASSERT(g_io_done == true);
2226 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
2227 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2228 
2229 	/* Test multi vector command that needs to be split by strip and then needs to be
2230 	 * split further due to the capacity of child iovs.
2231 	 *
2232 	 * In this case, the last two iovs need to be split, but it will exceed the capacity
2233 	 * of child iovs, so it needs to wait until the first batch completed.
2234 	 */
2235 	bdev->max_segment_size = 512;
2236 	bdev->max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
2237 	g_io_done = false;
2238 
2239 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2240 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2241 		iov[i].iov_len = 512;
2242 	}
2243 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV; i++) {
2244 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2245 		iov[i].iov_len = 512 * 2;
2246 	}
2247 
2248 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
2249 					   SPDK_BDEV_IO_NUM_CHILD_IOV, SPDK_BDEV_IO_NUM_CHILD_IOV);
2250 	/* 0 ~ (SPDK_BDEV_IO_NUM_CHILD_IOV - 2) Will not be split */
2251 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2252 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
2253 	}
2254 	/* (SPDK_BDEV_IO_NUM_CHILD_IOV - 2) is split */
2255 	ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, 512);
2256 	ut_expected_io_set_iov(expected_io, i + 1, iov[i].iov_base + 512, 512);
2257 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2258 
2259 	/* Child iov entries exceed the max num of parent IO so split it in next round */
2260 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV, 2, 2);
2261 	ut_expected_io_set_iov(expected_io, 0, iov[i + 1].iov_base, 512);
2262 	ut_expected_io_set_iov(expected_io, 1, iov[i + 1].iov_base + 512, 512);
2263 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2264 
2265 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV, 0,
2266 				    SPDK_BDEV_IO_NUM_CHILD_IOV + 2, io_done, NULL);
2267 	CU_ASSERT(rc == 0);
2268 	CU_ASSERT(g_io_done == false);
2269 
2270 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2271 	stub_complete_io(1);
2272 	CU_ASSERT(g_io_done == false);
2273 
2274 	/* Next round */
2275 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2276 	stub_complete_io(1);
2277 	CU_ASSERT(g_io_done == true);
2278 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2279 
2280 	/* This case is similar to the previous one, but the io composed of
2281 	 * the last few entries of child iov is not enough for a blocklen, so they
2282 	 * cannot be put into this IO, but wait until the next time.
2283 	 */
2284 	bdev->max_segment_size = 512;
2285 	bdev->max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
2286 	g_io_done = false;
2287 
2288 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2289 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2290 		iov[i].iov_len = 512;
2291 	}
2292 
2293 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV + 2; i++) {
2294 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2295 		iov[i].iov_len = 128;
2296 	}
2297 
2298 	/* First child iovcnt is't SPDK_BDEV_IO_NUM_CHILD_IOV but SPDK_BDEV_IO_NUM_CHILD_IOV - 2.
2299 	 * Because the left 2 iov is not enough for a blocklen.
2300 	 */
2301 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0,
2302 					   SPDK_BDEV_IO_NUM_CHILD_IOV - 2, SPDK_BDEV_IO_NUM_CHILD_IOV - 2);
2303 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2304 		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
2305 	}
2306 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2307 
2308 	/* The second child io waits until the end of the first child io before executing.
2309 	 * Because the iovcnt of the two IOs exceeds the child iovcnt of the parent IO.
2310 	 * SPDK_BDEV_IO_NUM_CHILD_IOV - 2 to SPDK_BDEV_IO_NUM_CHILD_IOV + 2
2311 	 */
2312 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, SPDK_BDEV_IO_NUM_CHILD_IOV - 2,
2313 					   1, 4);
2314 	ut_expected_io_set_iov(expected_io, 0, iov[i].iov_base, iov[i].iov_len);
2315 	ut_expected_io_set_iov(expected_io, 1, iov[i + 1].iov_base, iov[i + 1].iov_len);
2316 	ut_expected_io_set_iov(expected_io, 2, iov[i + 2].iov_base, iov[i + 2].iov_len);
2317 	ut_expected_io_set_iov(expected_io, 3, iov[i + 3].iov_base, iov[i + 3].iov_len);
2318 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2319 
2320 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 2, 0,
2321 				    SPDK_BDEV_IO_NUM_CHILD_IOV - 1, io_done, NULL);
2322 	CU_ASSERT(rc == 0);
2323 	CU_ASSERT(g_io_done == false);
2324 
2325 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2326 	stub_complete_io(1);
2327 	CU_ASSERT(g_io_done == false);
2328 
2329 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2330 	stub_complete_io(1);
2331 	CU_ASSERT(g_io_done == true);
2332 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2333 
2334 	/* A very complicated case. Each sg entry exceeds max_segment_size and
2335 	 * needs to be split. At the same time, child io must be a multiple of blocklen.
2336 	 * At the same time, child iovcnt exceeds parent iovcnt.
2337 	 */
2338 	bdev->max_segment_size = 512 + 128;
2339 	bdev->max_num_segments = 3;
2340 	g_io_done = false;
2341 
2342 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i++) {
2343 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2344 		iov[i].iov_len = 512 + 256;
2345 	}
2346 
2347 	for (i = SPDK_BDEV_IO_NUM_CHILD_IOV - 2; i < SPDK_BDEV_IO_NUM_CHILD_IOV + 2; i++) {
2348 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2349 		iov[i].iov_len = 512 + 128;
2350 	}
2351 
2352 	/* Child IOs use 9 entries per for() round and 3 * 9 = 27 child iov entries.
2353 	 * Consume 4 parent IO iov entries per for() round and 6 block size.
2354 	 * Generate 9 child IOs.
2355 	 */
2356 	for (i = 0; i < 3; i++) {
2357 		uint32_t j = i * 4;
2358 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 6, 2, 3);
2359 		ut_expected_io_set_iov(expected_io, 0, iov[j].iov_base, 640);
2360 		ut_expected_io_set_iov(expected_io, 1, iov[j].iov_base + 640, 128);
2361 		ut_expected_io_set_iov(expected_io, 2, iov[j + 1].iov_base, 256);
2362 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2363 
2364 		/* Child io must be a multiple of blocklen
2365 		 * iov[j + 2] must be split. If the third entry is also added,
2366 		 * the multiple of blocklen cannot be guaranteed. But it still
2367 		 * occupies one iov entry of the parent child iov.
2368 		 */
2369 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 6 + 2, 2, 2);
2370 		ut_expected_io_set_iov(expected_io, 0, iov[j + 1].iov_base + 256, 512);
2371 		ut_expected_io_set_iov(expected_io, 1, iov[j + 2].iov_base, 512);
2372 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2373 
2374 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, i * 6 + 4, 2, 3);
2375 		ut_expected_io_set_iov(expected_io, 0, iov[j + 2].iov_base + 512, 256);
2376 		ut_expected_io_set_iov(expected_io, 1, iov[j + 3].iov_base, 640);
2377 		ut_expected_io_set_iov(expected_io, 2, iov[j + 3].iov_base + 640, 128);
2378 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2379 	}
2380 
2381 	/* Child iov position at 27, the 10th child IO
2382 	 * iov entry index is 3 * 4 and offset is 3 * 6
2383 	 */
2384 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 18, 2, 3);
2385 	ut_expected_io_set_iov(expected_io, 0, iov[12].iov_base, 640);
2386 	ut_expected_io_set_iov(expected_io, 1, iov[12].iov_base + 640, 128);
2387 	ut_expected_io_set_iov(expected_io, 2, iov[13].iov_base, 256);
2388 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2389 
2390 	/* Child iov position at 30, the 11th child IO */
2391 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 20, 2, 2);
2392 	ut_expected_io_set_iov(expected_io, 0, iov[13].iov_base + 256, 512);
2393 	ut_expected_io_set_iov(expected_io, 1, iov[14].iov_base, 512);
2394 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2395 
2396 	/* The 2nd split round and iovpos is 0, the 12th child IO */
2397 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 22, 2, 3);
2398 	ut_expected_io_set_iov(expected_io, 0, iov[14].iov_base + 512, 256);
2399 	ut_expected_io_set_iov(expected_io, 1, iov[15].iov_base, 640);
2400 	ut_expected_io_set_iov(expected_io, 2, iov[15].iov_base + 640, 128);
2401 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2402 
2403 	/* Consume 9 child IOs and 27 child iov entries.
2404 	 * Consume 4 parent IO iov entries per for() round and 6 block size.
2405 	 * Parent IO iov index start from 16 and block offset start from 24
2406 	 */
2407 	for (i = 0; i < 3; i++) {
2408 		uint32_t j = i * 4 + 16;
2409 		uint32_t offset = i * 6 + 24;
2410 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, 2, 3);
2411 		ut_expected_io_set_iov(expected_io, 0, iov[j].iov_base, 640);
2412 		ut_expected_io_set_iov(expected_io, 1, iov[j].iov_base + 640, 128);
2413 		ut_expected_io_set_iov(expected_io, 2, iov[j + 1].iov_base, 256);
2414 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2415 
2416 		/* Child io must be a multiple of blocklen
2417 		 * iov[j + 2] must be split. If the third entry is also added,
2418 		 * the multiple of blocklen cannot be guaranteed. But it still
2419 		 * occupies one iov entry of the parent child iov.
2420 		 */
2421 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset + 2, 2, 2);
2422 		ut_expected_io_set_iov(expected_io, 0, iov[j + 1].iov_base + 256, 512);
2423 		ut_expected_io_set_iov(expected_io, 1, iov[j + 2].iov_base, 512);
2424 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2425 
2426 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset + 4, 2, 3);
2427 		ut_expected_io_set_iov(expected_io, 0, iov[j + 2].iov_base + 512, 256);
2428 		ut_expected_io_set_iov(expected_io, 1, iov[j + 3].iov_base, 640);
2429 		ut_expected_io_set_iov(expected_io, 2, iov[j + 3].iov_base + 640, 128);
2430 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2431 	}
2432 
2433 	/* The 22th child IO, child iov position at 30 */
2434 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 42, 1, 1);
2435 	ut_expected_io_set_iov(expected_io, 0, iov[28].iov_base, 512);
2436 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2437 
2438 	/* The third round */
2439 	/* Here is the 23nd child IO and child iovpos is 0 */
2440 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 43, 2, 3);
2441 	ut_expected_io_set_iov(expected_io, 0, iov[28].iov_base + 512, 256);
2442 	ut_expected_io_set_iov(expected_io, 1, iov[29].iov_base, 640);
2443 	ut_expected_io_set_iov(expected_io, 2, iov[29].iov_base + 640, 128);
2444 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2445 
2446 	/* The 24th child IO */
2447 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 45, 3, 3);
2448 	ut_expected_io_set_iov(expected_io, 0, iov[30].iov_base, 640);
2449 	ut_expected_io_set_iov(expected_io, 1, iov[31].iov_base, 640);
2450 	ut_expected_io_set_iov(expected_io, 2, iov[32].iov_base, 256);
2451 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2452 
2453 	/* The 25th child IO */
2454 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 48, 2, 2);
2455 	ut_expected_io_set_iov(expected_io, 0, iov[32].iov_base + 256, 384);
2456 	ut_expected_io_set_iov(expected_io, 1, iov[33].iov_base, 640);
2457 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2458 
2459 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV + 2, 0,
2460 				    50, io_done, NULL);
2461 	CU_ASSERT(rc == 0);
2462 	CU_ASSERT(g_io_done == false);
2463 
2464 	/* Parent IO supports up to 32 child iovs, so it is calculated that
2465 	 * a maximum of 11 IOs can be split at a time, and the
2466 	 * splitting will continue after the first batch is over.
2467 	 */
2468 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 11);
2469 	stub_complete_io(11);
2470 	CU_ASSERT(g_io_done == false);
2471 
2472 	/* The 2nd round */
2473 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 11);
2474 	stub_complete_io(11);
2475 	CU_ASSERT(g_io_done == false);
2476 
2477 	/* The last round */
2478 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
2479 	stub_complete_io(3);
2480 	CU_ASSERT(g_io_done == true);
2481 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2482 
2483 	/* Test an WRITE_ZEROES.  This should also not be split. */
2484 	bdev->max_segment_size = 512;
2485 	bdev->max_num_segments = 1;
2486 	g_io_done = false;
2487 
2488 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 9, 36, 0);
2489 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2490 
2491 	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
2492 	CU_ASSERT(rc == 0);
2493 	CU_ASSERT(g_io_done == false);
2494 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2495 	stub_complete_io(1);
2496 	CU_ASSERT(g_io_done == true);
2497 
2498 	/* Test an UNMAP.  This should also not be split. */
2499 	g_io_done = false;
2500 
2501 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, 15, 4, 0);
2502 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2503 
2504 	rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 4, io_done, NULL);
2505 	CU_ASSERT(rc == 0);
2506 	CU_ASSERT(g_io_done == false);
2507 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2508 	stub_complete_io(1);
2509 	CU_ASSERT(g_io_done == true);
2510 
2511 	/* Test a FLUSH.  This should also not be split. */
2512 	g_io_done = false;
2513 
2514 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_FLUSH, 15, 4, 0);
2515 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2516 
2517 	rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
2518 	CU_ASSERT(rc == 0);
2519 	CU_ASSERT(g_io_done == false);
2520 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2521 	stub_complete_io(1);
2522 	CU_ASSERT(g_io_done == true);
2523 
2524 	/* Test a COPY.  This should also not be split. */
2525 	g_io_done = false;
2526 
2527 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 9, 45, 36);
2528 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2529 
2530 	rc = spdk_bdev_copy_blocks(desc, io_ch, 9, 45, 36, io_done, NULL);
2531 	CU_ASSERT(rc == 0);
2532 	CU_ASSERT(g_io_done == false);
2533 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2534 	stub_complete_io(1);
2535 	CU_ASSERT(g_io_done == true);
2536 
2537 	spdk_put_io_channel(io_ch);
2538 	spdk_bdev_close(desc);
2539 	free_bdev(bdev);
2540 	ut_fini_bdev();
2541 }
2542 
2543 static void
2544 bdev_io_mix_split_test(void)
2545 {
2546 	struct spdk_bdev *bdev;
2547 	struct spdk_bdev_desc *desc = NULL;
2548 	struct spdk_io_channel *io_ch;
2549 	struct spdk_bdev_opts bdev_opts = {};
2550 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
2551 	struct ut_expected_io *expected_io;
2552 	uint64_t i;
2553 	int rc;
2554 
2555 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2556 	bdev_opts.bdev_io_pool_size = 512;
2557 	bdev_opts.bdev_io_cache_size = 64;
2558 	ut_init_bdev(&bdev_opts);
2559 
2560 	bdev = allocate_bdev("bdev0");
2561 
2562 	rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
2563 	CU_ASSERT(rc == 0);
2564 	SPDK_CU_ASSERT_FATAL(desc != NULL);
2565 	io_ch = spdk_bdev_get_io_channel(desc);
2566 	CU_ASSERT(io_ch != NULL);
2567 
2568 	/* First case optimal_io_boundary == max_segment_size * max_num_segments */
2569 	bdev->split_on_optimal_io_boundary = true;
2570 	bdev->optimal_io_boundary = 16;
2571 
2572 	bdev->max_segment_size = 512;
2573 	bdev->max_num_segments = 16;
2574 	g_io_done = false;
2575 
2576 	/* IO crossing the IO boundary requires split
2577 	 * Total 2 child IOs.
2578 	 */
2579 
2580 	/* The 1st child IO split the segment_size to multiple segment entry */
2581 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 2);
2582 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512);
2583 	ut_expected_io_set_iov(expected_io, 1, (void *)(0xF000 + 512), 512);
2584 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2585 
2586 	/* The 2nd child IO split the segment_size to multiple segment entry */
2587 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 2, 2);
2588 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 512);
2589 	ut_expected_io_set_iov(expected_io, 1, (void *)(0xF000 + 3 * 512), 512);
2590 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2591 
2592 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 4, io_done, NULL);
2593 	CU_ASSERT(rc == 0);
2594 	CU_ASSERT(g_io_done == false);
2595 
2596 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2597 	stub_complete_io(2);
2598 	CU_ASSERT(g_io_done == true);
2599 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2600 
2601 	/* Second case optimal_io_boundary > max_segment_size * max_num_segments */
2602 	bdev->max_segment_size = 15 * 512;
2603 	bdev->max_num_segments = 1;
2604 	g_io_done = false;
2605 
2606 	/* IO crossing the IO boundary requires split.
2607 	 * The 1st child IO segment size exceeds the max_segment_size,
2608 	 * So 1st child IO will be split to multiple segment entry.
2609 	 * Then it split to 2 child IOs because of the max_num_segments.
2610 	 * Total 3 child IOs.
2611 	 */
2612 
2613 	/* The first 2 IOs are in an IO boundary.
2614 	 * Because the optimal_io_boundary > max_segment_size * max_num_segments
2615 	 * So it split to the first 2 IOs.
2616 	 */
2617 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 15, 1);
2618 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512 * 15);
2619 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2620 
2621 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 15, 1, 1);
2622 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 512 * 15), 512);
2623 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2624 
2625 	/* The 3rd Child IO is because of the io boundary */
2626 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 2, 1);
2627 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 512 * 16), 512 * 2);
2628 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2629 
2630 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 0, 18, io_done, NULL);
2631 	CU_ASSERT(rc == 0);
2632 	CU_ASSERT(g_io_done == false);
2633 
2634 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
2635 	stub_complete_io(3);
2636 	CU_ASSERT(g_io_done == true);
2637 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2638 
2639 	/* Third case optimal_io_boundary < max_segment_size * max_num_segments */
2640 	bdev->max_segment_size = 17 * 512;
2641 	bdev->max_num_segments = 1;
2642 	g_io_done = false;
2643 
2644 	/* IO crossing the IO boundary requires split.
2645 	 * Child IO does not split.
2646 	 * Total 2 child IOs.
2647 	 */
2648 
2649 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 16, 1);
2650 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 512 * 16);
2651 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2652 
2653 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 2, 1);
2654 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 512 * 16), 512 * 2);
2655 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2656 
2657 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 0, 18, io_done, NULL);
2658 	CU_ASSERT(rc == 0);
2659 	CU_ASSERT(g_io_done == false);
2660 
2661 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2662 	stub_complete_io(2);
2663 	CU_ASSERT(g_io_done == true);
2664 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2665 
2666 	/* Now set up a more complex, multi-vector command that needs to be split,
2667 	 * including splitting iovecs.
2668 	 * optimal_io_boundary < max_segment_size * max_num_segments
2669 	 */
2670 	bdev->max_segment_size = 3 * 512;
2671 	bdev->max_num_segments = 6;
2672 	g_io_done = false;
2673 
2674 	iov[0].iov_base = (void *)0x10000;
2675 	iov[0].iov_len = 4 * 512;
2676 	iov[1].iov_base = (void *)0x20000;
2677 	iov[1].iov_len = 4 * 512;
2678 	iov[2].iov_base = (void *)0x30000;
2679 	iov[2].iov_len = 10 * 512;
2680 
2681 	/* IO crossing the IO boundary requires split.
2682 	 * The 1st child IO segment size exceeds the max_segment_size and after
2683 	 * splitting segment_size, the num_segments exceeds max_num_segments.
2684 	 * So 1st child IO will be split to 2 child IOs.
2685 	 * Total 3 child IOs.
2686 	 */
2687 
2688 	/* The first 2 IOs are in an IO boundary.
2689 	 * After splitting segment size the segment num exceeds.
2690 	 * So it splits to 2 child IOs.
2691 	 */
2692 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 14, 6);
2693 	ut_expected_io_set_iov(expected_io, 0, iov[0].iov_base, 512 * 3);
2694 	ut_expected_io_set_iov(expected_io, 1, iov[0].iov_base + 512 * 3, 512);
2695 	ut_expected_io_set_iov(expected_io, 2, iov[1].iov_base, 512 * 3);
2696 	ut_expected_io_set_iov(expected_io, 3, iov[1].iov_base + 512 * 3, 512);
2697 	ut_expected_io_set_iov(expected_io, 4, iov[2].iov_base, 512 * 3);
2698 	ut_expected_io_set_iov(expected_io, 5, iov[2].iov_base + 512 * 3, 512 * 3);
2699 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2700 
2701 	/* The 2nd child IO has the left segment entry */
2702 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
2703 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 6, 512 * 2);
2704 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2705 
2706 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 2, 1);
2707 	ut_expected_io_set_iov(expected_io, 0, iov[2].iov_base + 512 * 8, 512 * 2);
2708 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2709 
2710 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 3, 0, 18, io_done, NULL);
2711 	CU_ASSERT(rc == 0);
2712 	CU_ASSERT(g_io_done == false);
2713 
2714 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
2715 	stub_complete_io(3);
2716 	CU_ASSERT(g_io_done == true);
2717 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2718 
2719 	/* A very complicated case. Each sg entry exceeds max_segment_size
2720 	 * and split on io boundary.
2721 	 * optimal_io_boundary < max_segment_size * max_num_segments
2722 	 */
2723 	bdev->max_segment_size = 3 * 512;
2724 	bdev->max_num_segments = SPDK_BDEV_IO_NUM_CHILD_IOV;
2725 	g_io_done = false;
2726 
2727 	for (i = 0; i < 20; i++) {
2728 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
2729 		iov[i].iov_len = 512 * 4;
2730 	}
2731 
2732 	/* IO crossing the IO boundary requires split.
2733 	 * 80 block length can split 5 child IOs base on offset and IO boundary.
2734 	 * Each iov entry needs to be split to 2 entries because of max_segment_size
2735 	 * Total 5 child IOs.
2736 	 */
2737 
2738 	/* 4 iov entries are in an IO boundary and each iov entry splits to 2.
2739 	 * So each child IO occupies 8 child iov entries.
2740 	 */
2741 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 16, 8);
2742 	for (i = 0; i < 4; i++) {
2743 		int iovcnt = i * 2;
2744 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2745 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2746 	}
2747 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2748 
2749 	/* 2nd child IO and total 16 child iov entries of parent IO */
2750 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 16, 8);
2751 	for (i = 4; i < 8; i++) {
2752 		int iovcnt = (i - 4) * 2;
2753 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2754 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2755 	}
2756 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2757 
2758 	/* 3rd child IO and total 24 child iov entries of parent IO */
2759 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 16, 8);
2760 	for (i = 8; i < 12; i++) {
2761 		int iovcnt = (i - 8) * 2;
2762 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2763 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2764 	}
2765 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2766 
2767 	/* 4th child IO and total 32 child iov entries of parent IO */
2768 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 48, 16, 8);
2769 	for (i = 12; i < 16; i++) {
2770 		int iovcnt = (i - 12) * 2;
2771 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2772 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2773 	}
2774 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2775 
2776 	/* 5th child IO and because of the child iov entry it should be split
2777 	 * in next round.
2778 	 */
2779 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 64, 16, 8);
2780 	for (i = 16; i < 20; i++) {
2781 		int iovcnt = (i - 16) * 2;
2782 		ut_expected_io_set_iov(expected_io, iovcnt, iov[i].iov_base, 512 * 3);
2783 		ut_expected_io_set_iov(expected_io, iovcnt + 1, iov[i].iov_base + 512 * 3, 512);
2784 	}
2785 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2786 
2787 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 20, 0, 80, io_done, NULL);
2788 	CU_ASSERT(rc == 0);
2789 	CU_ASSERT(g_io_done == false);
2790 
2791 	/* First split round */
2792 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
2793 	stub_complete_io(4);
2794 	CU_ASSERT(g_io_done == false);
2795 
2796 	/* Second split round */
2797 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2798 	stub_complete_io(1);
2799 	CU_ASSERT(g_io_done == true);
2800 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2801 
2802 	spdk_put_io_channel(io_ch);
2803 	spdk_bdev_close(desc);
2804 	free_bdev(bdev);
2805 	ut_fini_bdev();
2806 }
2807 
2808 static void
2809 bdev_io_split_with_io_wait(void)
2810 {
2811 	struct spdk_bdev *bdev;
2812 	struct spdk_bdev_desc *desc = NULL;
2813 	struct spdk_io_channel *io_ch;
2814 	struct spdk_bdev_channel *channel;
2815 	struct spdk_bdev_mgmt_channel *mgmt_ch;
2816 	struct spdk_bdev_opts bdev_opts = {};
2817 	struct iovec iov[3];
2818 	struct ut_expected_io *expected_io;
2819 	int rc;
2820 
2821 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2822 	bdev_opts.bdev_io_pool_size = 2;
2823 	bdev_opts.bdev_io_cache_size = 1;
2824 	ut_init_bdev(&bdev_opts);
2825 
2826 	bdev = allocate_bdev("bdev0");
2827 
2828 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
2829 	CU_ASSERT(rc == 0);
2830 	CU_ASSERT(desc != NULL);
2831 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
2832 	io_ch = spdk_bdev_get_io_channel(desc);
2833 	CU_ASSERT(io_ch != NULL);
2834 	channel = spdk_io_channel_get_ctx(io_ch);
2835 	mgmt_ch = channel->shared_resource->mgmt_ch;
2836 
2837 	bdev->optimal_io_boundary = 16;
2838 	bdev->split_on_optimal_io_boundary = true;
2839 
2840 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, NULL);
2841 	CU_ASSERT(rc == 0);
2842 
2843 	/* Now test that a single-vector command is split correctly.
2844 	 * Offset 14, length 8, payload 0xF000
2845 	 *  Child - Offset 14, length 2, payload 0xF000
2846 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
2847 	 *
2848 	 * Set up the expected values before calling spdk_bdev_read_blocks
2849 	 */
2850 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
2851 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
2852 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2853 
2854 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
2855 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
2856 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2857 
2858 	/* The following children will be submitted sequentially due to the capacity of
2859 	 * spdk_bdev_io.
2860 	 */
2861 
2862 	/* The first child I/O will be queued to wait until an spdk_bdev_io becomes available */
2863 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL);
2864 	CU_ASSERT(rc == 0);
2865 	CU_ASSERT(!TAILQ_EMPTY(&mgmt_ch->io_wait_queue));
2866 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2867 
2868 	/* Completing the first read I/O will submit the first child */
2869 	stub_complete_io(1);
2870 	CU_ASSERT(TAILQ_EMPTY(&mgmt_ch->io_wait_queue));
2871 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2872 
2873 	/* Completing the first child will submit the second child */
2874 	stub_complete_io(1);
2875 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2876 
2877 	/* Complete the second child I/O.  This should result in our callback getting
2878 	 * invoked since the parent I/O is now complete.
2879 	 */
2880 	stub_complete_io(1);
2881 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2882 
2883 	/* Now set up a more complex, multi-vector command that needs to be split,
2884 	 *  including splitting iovecs.
2885 	 */
2886 	iov[0].iov_base = (void *)0x10000;
2887 	iov[0].iov_len = 512;
2888 	iov[1].iov_base = (void *)0x20000;
2889 	iov[1].iov_len = 20 * 512;
2890 	iov[2].iov_base = (void *)0x30000;
2891 	iov[2].iov_len = 11 * 512;
2892 
2893 	g_io_done = false;
2894 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 2);
2895 	ut_expected_io_set_iov(expected_io, 0, (void *)0x10000, 512);
2896 	ut_expected_io_set_iov(expected_io, 1, (void *)0x20000, 512);
2897 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2898 
2899 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 16, 1);
2900 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 512), 16 * 512);
2901 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2902 
2903 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 2);
2904 	ut_expected_io_set_iov(expected_io, 0, (void *)(0x20000 + 17 * 512), 3 * 512);
2905 	ut_expected_io_set_iov(expected_io, 1, (void *)0x30000, 11 * 512);
2906 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2907 
2908 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, 3, 14, 32, io_done, NULL);
2909 	CU_ASSERT(rc == 0);
2910 	CU_ASSERT(g_io_done == false);
2911 
2912 	/* The following children will be submitted sequentially due to the capacity of
2913 	 * spdk_bdev_io.
2914 	 */
2915 
2916 	/* Completing the first child will submit the second child */
2917 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2918 	stub_complete_io(1);
2919 	CU_ASSERT(g_io_done == false);
2920 
2921 	/* Completing the second child will submit the third child */
2922 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2923 	stub_complete_io(1);
2924 	CU_ASSERT(g_io_done == false);
2925 
2926 	/* Completing the third child will result in our callback getting invoked
2927 	 * since the parent I/O is now complete.
2928 	 */
2929 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
2930 	stub_complete_io(1);
2931 	CU_ASSERT(g_io_done == true);
2932 
2933 	CU_ASSERT(TAILQ_EMPTY(&g_bdev_ut_channel->expected_io));
2934 
2935 	spdk_put_io_channel(io_ch);
2936 	spdk_bdev_close(desc);
2937 	free_bdev(bdev);
2938 	ut_fini_bdev();
2939 }
2940 
2941 static void
2942 bdev_io_write_unit_split_test(void)
2943 {
2944 	struct spdk_bdev *bdev;
2945 	struct spdk_bdev_desc *desc = NULL;
2946 	struct spdk_io_channel *io_ch;
2947 	struct spdk_bdev_opts bdev_opts = {};
2948 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 4];
2949 	struct ut_expected_io *expected_io;
2950 	uint64_t i;
2951 	int rc;
2952 
2953 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
2954 	bdev_opts.bdev_io_pool_size = 512;
2955 	bdev_opts.bdev_io_cache_size = 64;
2956 	ut_init_bdev(&bdev_opts);
2957 
2958 	bdev = allocate_bdev("bdev0");
2959 
2960 	rc = spdk_bdev_open_ext(bdev->name, true, bdev_ut_event_cb, NULL, &desc);
2961 	CU_ASSERT(rc == 0);
2962 	SPDK_CU_ASSERT_FATAL(desc != NULL);
2963 	io_ch = spdk_bdev_get_io_channel(desc);
2964 	CU_ASSERT(io_ch != NULL);
2965 
2966 	/* Write I/O 2x larger than write_unit_size should get split into 2 I/Os */
2967 	bdev->write_unit_size = 32;
2968 	bdev->split_on_write_unit = true;
2969 	g_io_done = false;
2970 
2971 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 0, 32, 1);
2972 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 32 * 512);
2973 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2974 
2975 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 32, 1);
2976 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 32 * 512), 32 * 512);
2977 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
2978 
2979 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 64, io_done, NULL);
2980 	CU_ASSERT(rc == 0);
2981 	CU_ASSERT(g_io_done == false);
2982 
2983 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
2984 	stub_complete_io(2);
2985 	CU_ASSERT(g_io_done == true);
2986 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
2987 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
2988 
2989 	/* Same as above but with optimal_io_boundary < write_unit_size - the I/O should be split
2990 	 * based on write_unit_size, not optimal_io_boundary */
2991 	bdev->split_on_optimal_io_boundary = true;
2992 	bdev->optimal_io_boundary = 16;
2993 	g_io_done = false;
2994 
2995 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 64, io_done, NULL);
2996 	CU_ASSERT(rc == 0);
2997 	CU_ASSERT(g_io_done == false);
2998 
2999 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3000 	stub_complete_io(2);
3001 	CU_ASSERT(g_io_done == true);
3002 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3003 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3004 
3005 	/* Write I/O should fail if it is smaller than write_unit_size */
3006 	g_io_done = false;
3007 
3008 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 0, 31, io_done, NULL);
3009 	CU_ASSERT(rc == 0);
3010 	CU_ASSERT(g_io_done == false);
3011 
3012 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3013 	poll_threads();
3014 	CU_ASSERT(g_io_done == true);
3015 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3016 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
3017 
3018 	/* Same for I/O not aligned to write_unit_size */
3019 	g_io_done = false;
3020 
3021 	rc = spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 1, 32, io_done, NULL);
3022 	CU_ASSERT(rc == 0);
3023 	CU_ASSERT(g_io_done == false);
3024 
3025 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3026 	poll_threads();
3027 	CU_ASSERT(g_io_done == true);
3028 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3029 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
3030 
3031 	/* Write should fail if it needs to be split but there are not enough iovs to submit
3032 	 * an entire write unit */
3033 	bdev->write_unit_size = SPDK_COUNTOF(iov) / 2;
3034 	g_io_done = false;
3035 
3036 	for (i = 0; i < SPDK_COUNTOF(iov); i++) {
3037 		iov[i].iov_base = (void *)(0x1000 + 512 * i);
3038 		iov[i].iov_len = 512;
3039 	}
3040 
3041 	rc = spdk_bdev_writev_blocks(desc, io_ch, iov, SPDK_COUNTOF(iov), 0, SPDK_COUNTOF(iov),
3042 				     io_done, NULL);
3043 	CU_ASSERT(rc == 0);
3044 	CU_ASSERT(g_io_done == false);
3045 
3046 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3047 	poll_threads();
3048 	CU_ASSERT(g_io_done == true);
3049 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
3050 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
3051 
3052 	spdk_put_io_channel(io_ch);
3053 	spdk_bdev_close(desc);
3054 	free_bdev(bdev);
3055 	ut_fini_bdev();
3056 }
3057 
3058 static void
3059 bdev_io_alignment(void)
3060 {
3061 	struct spdk_bdev *bdev;
3062 	struct spdk_bdev_desc *desc = NULL;
3063 	struct spdk_io_channel *io_ch;
3064 	struct spdk_bdev_opts bdev_opts = {};
3065 	int rc;
3066 	void *buf = NULL;
3067 	struct iovec iovs[2];
3068 	int iovcnt;
3069 	uint64_t alignment;
3070 
3071 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
3072 	bdev_opts.bdev_io_pool_size = 20;
3073 	bdev_opts.bdev_io_cache_size = 2;
3074 	ut_init_bdev(&bdev_opts);
3075 
3076 	fn_table.submit_request = stub_submit_request_get_buf;
3077 	bdev = allocate_bdev("bdev0");
3078 
3079 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
3080 	CU_ASSERT(rc == 0);
3081 	CU_ASSERT(desc != NULL);
3082 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3083 	io_ch = spdk_bdev_get_io_channel(desc);
3084 	CU_ASSERT(io_ch != NULL);
3085 
3086 	/* Create aligned buffer */
3087 	rc = posix_memalign(&buf, 4096, 8192);
3088 	SPDK_CU_ASSERT_FATAL(rc == 0);
3089 
3090 	/* Pass aligned single buffer with no alignment required */
3091 	alignment = 1;
3092 	bdev->required_alignment = spdk_u32log2(alignment);
3093 
3094 	rc = spdk_bdev_write_blocks(desc, io_ch, buf, 0, 1, io_done, NULL);
3095 	CU_ASSERT(rc == 0);
3096 	stub_complete_io(1);
3097 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3098 				    alignment));
3099 
3100 	rc = spdk_bdev_read_blocks(desc, io_ch, buf, 0, 1, io_done, NULL);
3101 	CU_ASSERT(rc == 0);
3102 	stub_complete_io(1);
3103 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3104 				    alignment));
3105 
3106 	/* Pass unaligned single buffer with no alignment required */
3107 	alignment = 1;
3108 	bdev->required_alignment = spdk_u32log2(alignment);
3109 
3110 	rc = spdk_bdev_write_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3111 	CU_ASSERT(rc == 0);
3112 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3113 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == buf + 4);
3114 	stub_complete_io(1);
3115 
3116 	rc = spdk_bdev_read_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3117 	CU_ASSERT(rc == 0);
3118 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3119 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == buf + 4);
3120 	stub_complete_io(1);
3121 
3122 	/* Pass unaligned single buffer with 512 alignment required */
3123 	alignment = 512;
3124 	bdev->required_alignment = spdk_u32log2(alignment);
3125 
3126 	rc = spdk_bdev_write_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3127 	CU_ASSERT(rc == 0);
3128 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3129 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3130 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3131 				    alignment));
3132 	stub_complete_io(1);
3133 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3134 
3135 	rc = spdk_bdev_read_blocks(desc, io_ch, buf + 4, 0, 1, io_done, NULL);
3136 	CU_ASSERT(rc == 0);
3137 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3138 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3139 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3140 				    alignment));
3141 	stub_complete_io(1);
3142 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3143 
3144 	/* Pass unaligned single buffer with 4096 alignment required */
3145 	alignment = 4096;
3146 	bdev->required_alignment = spdk_u32log2(alignment);
3147 
3148 	rc = spdk_bdev_write_blocks(desc, io_ch, buf + 8, 0, 1, io_done, NULL);
3149 	CU_ASSERT(rc == 0);
3150 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3151 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3152 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3153 				    alignment));
3154 	stub_complete_io(1);
3155 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3156 
3157 	rc = spdk_bdev_read_blocks(desc, io_ch, buf + 8, 0, 1, io_done, NULL);
3158 	CU_ASSERT(rc == 0);
3159 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 1);
3160 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3161 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3162 				    alignment));
3163 	stub_complete_io(1);
3164 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3165 
3166 	/* Pass aligned iovs with no alignment required */
3167 	alignment = 1;
3168 	bdev->required_alignment = spdk_u32log2(alignment);
3169 
3170 	iovcnt = 1;
3171 	iovs[0].iov_base = buf;
3172 	iovs[0].iov_len = 512;
3173 
3174 	rc = spdk_bdev_writev(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3175 	CU_ASSERT(rc == 0);
3176 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3177 	stub_complete_io(1);
3178 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3179 
3180 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3181 	CU_ASSERT(rc == 0);
3182 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3183 	stub_complete_io(1);
3184 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3185 
3186 	/* Pass unaligned iovs with no alignment required */
3187 	alignment = 1;
3188 	bdev->required_alignment = spdk_u32log2(alignment);
3189 
3190 	iovcnt = 2;
3191 	iovs[0].iov_base = buf + 16;
3192 	iovs[0].iov_len = 256;
3193 	iovs[1].iov_base = buf + 16 + 256 + 32;
3194 	iovs[1].iov_len = 256;
3195 
3196 	rc = spdk_bdev_writev(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3197 	CU_ASSERT(rc == 0);
3198 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3199 	stub_complete_io(1);
3200 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3201 
3202 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3203 	CU_ASSERT(rc == 0);
3204 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3205 	stub_complete_io(1);
3206 	CU_ASSERT(g_bdev_io->u.bdev.iovs[0].iov_base == iovs[0].iov_base);
3207 
3208 	/* Pass unaligned iov with 2048 alignment required */
3209 	alignment = 2048;
3210 	bdev->required_alignment = spdk_u32log2(alignment);
3211 
3212 	iovcnt = 2;
3213 	iovs[0].iov_base = buf + 16;
3214 	iovs[0].iov_len = 256;
3215 	iovs[1].iov_base = buf + 16 + 256 + 32;
3216 	iovs[1].iov_len = 256;
3217 
3218 	rc = spdk_bdev_writev(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3219 	CU_ASSERT(rc == 0);
3220 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == iovcnt);
3221 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3222 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3223 				    alignment));
3224 	stub_complete_io(1);
3225 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3226 
3227 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3228 	CU_ASSERT(rc == 0);
3229 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == iovcnt);
3230 	CU_ASSERT(g_bdev_io->u.bdev.iovs == &g_bdev_io->internal.bounce_iov);
3231 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3232 				    alignment));
3233 	stub_complete_io(1);
3234 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3235 
3236 	/* Pass iov without allocated buffer without alignment required */
3237 	alignment = 1;
3238 	bdev->required_alignment = spdk_u32log2(alignment);
3239 
3240 	iovcnt = 1;
3241 	iovs[0].iov_base = NULL;
3242 	iovs[0].iov_len = 0;
3243 
3244 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3245 	CU_ASSERT(rc == 0);
3246 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3247 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3248 				    alignment));
3249 	stub_complete_io(1);
3250 
3251 	/* Pass iov without allocated buffer with 1024 alignment required */
3252 	alignment = 1024;
3253 	bdev->required_alignment = spdk_u32log2(alignment);
3254 
3255 	iovcnt = 1;
3256 	iovs[0].iov_base = NULL;
3257 	iovs[0].iov_len = 0;
3258 
3259 	rc = spdk_bdev_readv(desc, io_ch, iovs, iovcnt, 0, 512, io_done, NULL);
3260 	CU_ASSERT(rc == 0);
3261 	CU_ASSERT(g_bdev_io->internal.orig_iovcnt == 0);
3262 	CU_ASSERT(_are_iovs_aligned(g_bdev_io->u.bdev.iovs, g_bdev_io->u.bdev.iovcnt,
3263 				    alignment));
3264 	stub_complete_io(1);
3265 
3266 	spdk_put_io_channel(io_ch);
3267 	spdk_bdev_close(desc);
3268 	free_bdev(bdev);
3269 	fn_table.submit_request = stub_submit_request;
3270 	ut_fini_bdev();
3271 
3272 	free(buf);
3273 }
3274 
3275 static void
3276 bdev_io_alignment_with_boundary(void)
3277 {
3278 	struct spdk_bdev *bdev;
3279 	struct spdk_bdev_desc *desc = NULL;
3280 	struct spdk_io_channel *io_ch;
3281 	struct spdk_bdev_opts bdev_opts = {};
3282 	int rc;
3283 	void *buf = NULL;
3284 	struct iovec iovs[2];
3285 	int iovcnt;
3286 	uint64_t alignment;
3287 
3288 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
3289 	bdev_opts.bdev_io_pool_size = 20;
3290 	bdev_opts.bdev_io_cache_size = 2;
3291 	bdev_opts.opts_size = sizeof(bdev_opts);
3292 	ut_init_bdev(&bdev_opts);
3293 
3294 	fn_table.submit_request = stub_submit_request_get_buf;
3295 	bdev = allocate_bdev("bdev0");
3296 
3297 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
3298 	CU_ASSERT(rc == 0);
3299 	CU_ASSERT(desc != NULL);
3300 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3301 	io_ch = spdk_bdev_get_io_channel(desc);
3302 	CU_ASSERT(io_ch != NULL);
3303 
3304 	/* Create aligned buffer */
3305 	rc = posix_memalign(&buf, 4096, 131072);
3306 	SPDK_CU_ASSERT_FATAL(rc == 0);
3307 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3308 
3309 	/* 512 * 3 with 2 IO boundary, allocate small data buffer from bdev layer */
3310 	alignment = 512;
3311 	bdev->required_alignment = spdk_u32log2(alignment);
3312 	bdev->optimal_io_boundary = 2;
3313 	bdev->split_on_optimal_io_boundary = true;
3314 
3315 	iovcnt = 1;
3316 	iovs[0].iov_base = NULL;
3317 	iovs[0].iov_len = 512 * 3;
3318 
3319 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 3, io_done, NULL);
3320 	CU_ASSERT(rc == 0);
3321 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3322 	stub_complete_io(2);
3323 
3324 	/* 8KiB with 16 IO boundary, allocate large data buffer from bdev layer */
3325 	alignment = 512;
3326 	bdev->required_alignment = spdk_u32log2(alignment);
3327 	bdev->optimal_io_boundary = 16;
3328 	bdev->split_on_optimal_io_boundary = true;
3329 
3330 	iovcnt = 1;
3331 	iovs[0].iov_base = NULL;
3332 	iovs[0].iov_len = 512 * 16;
3333 
3334 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 16, io_done, NULL);
3335 	CU_ASSERT(rc == 0);
3336 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3337 	stub_complete_io(2);
3338 
3339 	/* 512 * 160 with 128 IO boundary, 63.5KiB + 16.5KiB for the two children requests */
3340 	alignment = 512;
3341 	bdev->required_alignment = spdk_u32log2(alignment);
3342 	bdev->optimal_io_boundary = 128;
3343 	bdev->split_on_optimal_io_boundary = true;
3344 
3345 	iovcnt = 1;
3346 	iovs[0].iov_base = buf + 16;
3347 	iovs[0].iov_len = 512 * 160;
3348 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 160, io_done, NULL);
3349 	CU_ASSERT(rc == 0);
3350 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3351 	stub_complete_io(2);
3352 
3353 	/* 512 * 3 with 2 IO boundary */
3354 	alignment = 512;
3355 	bdev->required_alignment = spdk_u32log2(alignment);
3356 	bdev->optimal_io_boundary = 2;
3357 	bdev->split_on_optimal_io_boundary = true;
3358 
3359 	iovcnt = 2;
3360 	iovs[0].iov_base = buf + 16;
3361 	iovs[0].iov_len = 512;
3362 	iovs[1].iov_base = buf + 16 + 512 + 32;
3363 	iovs[1].iov_len = 1024;
3364 
3365 	rc = spdk_bdev_writev_blocks(desc, io_ch, iovs, iovcnt, 1, 3, io_done, NULL);
3366 	CU_ASSERT(rc == 0);
3367 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3368 	stub_complete_io(2);
3369 
3370 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 3, io_done, NULL);
3371 	CU_ASSERT(rc == 0);
3372 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
3373 	stub_complete_io(2);
3374 
3375 	/* 512 * 64 with 32 IO boundary */
3376 	bdev->optimal_io_boundary = 32;
3377 	iovcnt = 2;
3378 	iovs[0].iov_base = buf + 16;
3379 	iovs[0].iov_len = 16384;
3380 	iovs[1].iov_base = buf + 16 + 16384 + 32;
3381 	iovs[1].iov_len = 16384;
3382 
3383 	rc = spdk_bdev_writev_blocks(desc, io_ch, iovs, iovcnt, 1, 64, io_done, NULL);
3384 	CU_ASSERT(rc == 0);
3385 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
3386 	stub_complete_io(3);
3387 
3388 	rc = spdk_bdev_readv_blocks(desc, io_ch, iovs, iovcnt, 1, 64, io_done, NULL);
3389 	CU_ASSERT(rc == 0);
3390 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 3);
3391 	stub_complete_io(3);
3392 
3393 	/* 512 * 160 with 32 IO boundary */
3394 	iovcnt = 1;
3395 	iovs[0].iov_base = buf + 16;
3396 	iovs[0].iov_len = 16384 + 65536;
3397 
3398 	rc = spdk_bdev_writev_blocks(desc, io_ch, iovs, iovcnt, 1, 160, io_done, NULL);
3399 	CU_ASSERT(rc == 0);
3400 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 6);
3401 	stub_complete_io(6);
3402 
3403 	spdk_put_io_channel(io_ch);
3404 	spdk_bdev_close(desc);
3405 	free_bdev(bdev);
3406 	fn_table.submit_request = stub_submit_request;
3407 	ut_fini_bdev();
3408 
3409 	free(buf);
3410 }
3411 
3412 static void
3413 histogram_status_cb(void *cb_arg, int status)
3414 {
3415 	g_status = status;
3416 }
3417 
3418 static void
3419 histogram_data_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram)
3420 {
3421 	g_status = status;
3422 	g_histogram = histogram;
3423 }
3424 
3425 static void
3426 histogram_io_count(void *ctx, uint64_t start, uint64_t end, uint64_t count,
3427 		   uint64_t total, uint64_t so_far)
3428 {
3429 	g_count += count;
3430 }
3431 
3432 static void
3433 histogram_channel_data_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram)
3434 {
3435 	spdk_histogram_data_fn cb_fn = cb_arg;
3436 
3437 	g_status = status;
3438 
3439 	if (status == 0) {
3440 		spdk_histogram_data_iterate(histogram, cb_fn, NULL);
3441 	}
3442 }
3443 
3444 static void
3445 bdev_histograms(void)
3446 {
3447 	struct spdk_bdev *bdev;
3448 	struct spdk_bdev_desc *desc = NULL;
3449 	struct spdk_io_channel *ch;
3450 	struct spdk_histogram_data *histogram;
3451 	uint8_t buf[4096];
3452 	int rc;
3453 
3454 	ut_init_bdev(NULL);
3455 
3456 	bdev = allocate_bdev("bdev");
3457 
3458 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3459 	CU_ASSERT(rc == 0);
3460 	CU_ASSERT(desc != NULL);
3461 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3462 
3463 	ch = spdk_bdev_get_io_channel(desc);
3464 	CU_ASSERT(ch != NULL);
3465 
3466 	/* Enable histogram */
3467 	g_status = -1;
3468 	spdk_bdev_histogram_enable(bdev, histogram_status_cb, NULL, true);
3469 	poll_threads();
3470 	CU_ASSERT(g_status == 0);
3471 	CU_ASSERT(bdev->internal.histogram_enabled == true);
3472 
3473 	/* Allocate histogram */
3474 	histogram = spdk_histogram_data_alloc();
3475 	SPDK_CU_ASSERT_FATAL(histogram != NULL);
3476 
3477 	/* Check if histogram is zeroed */
3478 	spdk_bdev_histogram_get(bdev, histogram, histogram_data_cb, NULL);
3479 	poll_threads();
3480 	CU_ASSERT(g_status == 0);
3481 	SPDK_CU_ASSERT_FATAL(g_histogram != NULL);
3482 
3483 	g_count = 0;
3484 	spdk_histogram_data_iterate(g_histogram, histogram_io_count, NULL);
3485 
3486 	CU_ASSERT(g_count == 0);
3487 
3488 	rc = spdk_bdev_write_blocks(desc, ch, buf, 0, 1, io_done, NULL);
3489 	CU_ASSERT(rc == 0);
3490 
3491 	spdk_delay_us(10);
3492 	stub_complete_io(1);
3493 	poll_threads();
3494 
3495 	rc = spdk_bdev_read_blocks(desc, ch, buf, 0, 1, io_done, NULL);
3496 	CU_ASSERT(rc == 0);
3497 
3498 	spdk_delay_us(10);
3499 	stub_complete_io(1);
3500 	poll_threads();
3501 
3502 	/* Check if histogram gathered data from all I/O channels */
3503 	g_histogram = NULL;
3504 	spdk_bdev_histogram_get(bdev, histogram, histogram_data_cb, NULL);
3505 	poll_threads();
3506 	CU_ASSERT(g_status == 0);
3507 	CU_ASSERT(bdev->internal.histogram_enabled == true);
3508 	SPDK_CU_ASSERT_FATAL(g_histogram != NULL);
3509 
3510 	g_count = 0;
3511 	spdk_histogram_data_iterate(g_histogram, histogram_io_count, NULL);
3512 	CU_ASSERT(g_count == 2);
3513 
3514 	g_count = 0;
3515 	spdk_bdev_channel_get_histogram(ch, histogram_channel_data_cb, histogram_io_count);
3516 	CU_ASSERT(g_status == 0);
3517 	CU_ASSERT(g_count == 2);
3518 
3519 	/* Disable histogram */
3520 	spdk_bdev_histogram_enable(bdev, histogram_status_cb, NULL, false);
3521 	poll_threads();
3522 	CU_ASSERT(g_status == 0);
3523 	CU_ASSERT(bdev->internal.histogram_enabled == false);
3524 
3525 	/* Try to run histogram commands on disabled bdev */
3526 	spdk_bdev_histogram_get(bdev, histogram, histogram_data_cb, NULL);
3527 	poll_threads();
3528 	CU_ASSERT(g_status == -EFAULT);
3529 
3530 	spdk_bdev_channel_get_histogram(ch, histogram_channel_data_cb, NULL);
3531 	CU_ASSERT(g_status == -EFAULT);
3532 
3533 	spdk_histogram_data_free(histogram);
3534 	spdk_put_io_channel(ch);
3535 	spdk_bdev_close(desc);
3536 	free_bdev(bdev);
3537 	ut_fini_bdev();
3538 }
3539 
3540 static void
3541 _bdev_compare(bool emulated)
3542 {
3543 	struct spdk_bdev *bdev;
3544 	struct spdk_bdev_desc *desc = NULL;
3545 	struct spdk_io_channel *ioch;
3546 	struct ut_expected_io *expected_io;
3547 	uint64_t offset, num_blocks;
3548 	uint32_t num_completed;
3549 	char aa_buf[512];
3550 	char bb_buf[512];
3551 	struct iovec compare_iov;
3552 	uint8_t expected_io_type;
3553 	int rc;
3554 
3555 	if (emulated) {
3556 		expected_io_type = SPDK_BDEV_IO_TYPE_READ;
3557 	} else {
3558 		expected_io_type = SPDK_BDEV_IO_TYPE_COMPARE;
3559 	}
3560 
3561 	memset(aa_buf, 0xaa, sizeof(aa_buf));
3562 	memset(bb_buf, 0xbb, sizeof(bb_buf));
3563 
3564 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = !emulated;
3565 
3566 	ut_init_bdev(NULL);
3567 	fn_table.submit_request = stub_submit_request_get_buf;
3568 	bdev = allocate_bdev("bdev");
3569 
3570 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3571 	CU_ASSERT_EQUAL(rc, 0);
3572 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3573 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3574 	ioch = spdk_bdev_get_io_channel(desc);
3575 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3576 
3577 	fn_table.submit_request = stub_submit_request_get_buf;
3578 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3579 
3580 	offset = 50;
3581 	num_blocks = 1;
3582 	compare_iov.iov_base = aa_buf;
3583 	compare_iov.iov_len = sizeof(aa_buf);
3584 
3585 	/* 1. successful compare */
3586 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3587 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3588 
3589 	g_io_done = false;
3590 	g_compare_read_buf = aa_buf;
3591 	g_compare_read_buf_len = sizeof(aa_buf);
3592 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3593 	CU_ASSERT_EQUAL(rc, 0);
3594 	num_completed = stub_complete_io(1);
3595 	CU_ASSERT_EQUAL(num_completed, 1);
3596 	CU_ASSERT(g_io_done == true);
3597 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3598 
3599 	/* 2. miscompare */
3600 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3601 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3602 
3603 	g_io_done = false;
3604 	g_compare_read_buf = bb_buf;
3605 	g_compare_read_buf_len = sizeof(bb_buf);
3606 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3607 	CU_ASSERT_EQUAL(rc, 0);
3608 	num_completed = stub_complete_io(1);
3609 	CU_ASSERT_EQUAL(num_completed, 1);
3610 	CU_ASSERT(g_io_done == true);
3611 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3612 
3613 	spdk_put_io_channel(ioch);
3614 	spdk_bdev_close(desc);
3615 	free_bdev(bdev);
3616 	fn_table.submit_request = stub_submit_request;
3617 	ut_fini_bdev();
3618 
3619 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = true;
3620 
3621 	g_compare_read_buf = NULL;
3622 }
3623 
3624 static void
3625 _bdev_compare_with_md(bool emulated)
3626 {
3627 	struct spdk_bdev *bdev;
3628 	struct spdk_bdev_desc *desc = NULL;
3629 	struct spdk_io_channel *ioch;
3630 	struct ut_expected_io *expected_io;
3631 	uint64_t offset, num_blocks;
3632 	uint32_t num_completed;
3633 	char buf[1024 + 16 /* 2 * blocklen + 2 * mdlen */];
3634 	char buf_interleaved_miscompare[1024 + 16 /* 2 * blocklen + 2 * mdlen */];
3635 	char buf_miscompare[1024 /* 2 * blocklen */];
3636 	char md_buf[16];
3637 	char md_buf_miscompare[16];
3638 	struct iovec compare_iov;
3639 	uint8_t expected_io_type;
3640 	int rc;
3641 
3642 	if (emulated) {
3643 		expected_io_type = SPDK_BDEV_IO_TYPE_READ;
3644 	} else {
3645 		expected_io_type = SPDK_BDEV_IO_TYPE_COMPARE;
3646 	}
3647 
3648 	memset(buf, 0xaa, sizeof(buf));
3649 	memset(buf_interleaved_miscompare, 0xaa, sizeof(buf_interleaved_miscompare));
3650 	/* make last md different */
3651 	memset(buf_interleaved_miscompare + 1024 + 8, 0xbb, 8);
3652 	memset(buf_miscompare, 0xbb, sizeof(buf_miscompare));
3653 	memset(md_buf, 0xaa, 16);
3654 	memset(md_buf_miscompare, 0xbb, 16);
3655 
3656 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = !emulated;
3657 
3658 	ut_init_bdev(NULL);
3659 	fn_table.submit_request = stub_submit_request_get_buf;
3660 	bdev = allocate_bdev("bdev");
3661 
3662 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3663 	CU_ASSERT_EQUAL(rc, 0);
3664 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3665 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3666 	ioch = spdk_bdev_get_io_channel(desc);
3667 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3668 
3669 	fn_table.submit_request = stub_submit_request_get_buf;
3670 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3671 
3672 	offset = 50;
3673 	num_blocks = 2;
3674 
3675 	/* interleaved md & data */
3676 	bdev->md_interleave = true;
3677 	bdev->md_len = 8;
3678 	bdev->blocklen = 512 + 8;
3679 	compare_iov.iov_base = buf;
3680 	compare_iov.iov_len = sizeof(buf);
3681 
3682 	/* 1. successful compare with md interleaved */
3683 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3684 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3685 
3686 	g_io_done = false;
3687 	g_compare_read_buf = buf;
3688 	g_compare_read_buf_len = sizeof(buf);
3689 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3690 	CU_ASSERT_EQUAL(rc, 0);
3691 	num_completed = stub_complete_io(1);
3692 	CU_ASSERT_EQUAL(num_completed, 1);
3693 	CU_ASSERT(g_io_done == true);
3694 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3695 
3696 	/* 2. miscompare with md interleaved */
3697 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3698 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3699 
3700 	g_io_done = false;
3701 	g_compare_read_buf = buf_interleaved_miscompare;
3702 	g_compare_read_buf_len = sizeof(buf_interleaved_miscompare);
3703 	rc = spdk_bdev_comparev_blocks(desc, ioch, &compare_iov, 1, offset, num_blocks, io_done, NULL);
3704 	CU_ASSERT_EQUAL(rc, 0);
3705 	num_completed = stub_complete_io(1);
3706 	CU_ASSERT_EQUAL(num_completed, 1);
3707 	CU_ASSERT(g_io_done == true);
3708 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3709 
3710 	/* Separate data & md buffers */
3711 	bdev->md_interleave = false;
3712 	bdev->blocklen = 512;
3713 	compare_iov.iov_base = buf;
3714 	compare_iov.iov_len = 1024;
3715 
3716 	/* 3. successful compare with md separated */
3717 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3718 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3719 
3720 	g_io_done = false;
3721 	g_compare_read_buf = buf;
3722 	g_compare_read_buf_len = 1024;
3723 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3724 	g_compare_md_buf = md_buf;
3725 	rc = spdk_bdev_comparev_blocks_with_md(desc, ioch, &compare_iov, 1, md_buf,
3726 					       offset, num_blocks, io_done, NULL);
3727 	CU_ASSERT_EQUAL(rc, 0);
3728 	num_completed = stub_complete_io(1);
3729 	CU_ASSERT_EQUAL(num_completed, 1);
3730 	CU_ASSERT(g_io_done == true);
3731 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3732 
3733 	/* 4. miscompare with md separated where md buf is different */
3734 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3735 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3736 
3737 	g_io_done = false;
3738 	g_compare_read_buf = buf;
3739 	g_compare_read_buf_len = 1024;
3740 	g_compare_md_buf = md_buf_miscompare;
3741 	rc = spdk_bdev_comparev_blocks_with_md(desc, ioch, &compare_iov, 1, md_buf,
3742 					       offset, num_blocks, io_done, NULL);
3743 	CU_ASSERT_EQUAL(rc, 0);
3744 	num_completed = stub_complete_io(1);
3745 	CU_ASSERT_EQUAL(num_completed, 1);
3746 	CU_ASSERT(g_io_done == true);
3747 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3748 
3749 	/* 5. miscompare with md separated where buf is different */
3750 	expected_io = ut_alloc_expected_io(expected_io_type, offset, num_blocks, 0);
3751 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3752 
3753 	g_io_done = false;
3754 	g_compare_read_buf = buf_miscompare;
3755 	g_compare_read_buf_len = sizeof(buf_miscompare);
3756 	g_compare_md_buf = md_buf;
3757 	rc = spdk_bdev_comparev_blocks_with_md(desc, ioch, &compare_iov, 1, md_buf,
3758 					       offset, num_blocks, io_done, NULL);
3759 	CU_ASSERT_EQUAL(rc, 0);
3760 	num_completed = stub_complete_io(1);
3761 	CU_ASSERT_EQUAL(num_completed, 1);
3762 	CU_ASSERT(g_io_done == true);
3763 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3764 
3765 	bdev->md_len = 0;
3766 	g_compare_md_buf = NULL;
3767 
3768 	spdk_put_io_channel(ioch);
3769 	spdk_bdev_close(desc);
3770 	free_bdev(bdev);
3771 	fn_table.submit_request = stub_submit_request;
3772 	ut_fini_bdev();
3773 
3774 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = true;
3775 
3776 	g_compare_read_buf = NULL;
3777 }
3778 
3779 static void
3780 bdev_compare(void)
3781 {
3782 	_bdev_compare(false);
3783 	_bdev_compare_with_md(false);
3784 }
3785 
3786 static void
3787 bdev_compare_emulated(void)
3788 {
3789 	_bdev_compare(true);
3790 	_bdev_compare_with_md(true);
3791 }
3792 
3793 static void
3794 bdev_compare_and_write(void)
3795 {
3796 	struct spdk_bdev *bdev;
3797 	struct spdk_bdev_desc *desc = NULL;
3798 	struct spdk_io_channel *ioch;
3799 	struct ut_expected_io *expected_io;
3800 	uint64_t offset, num_blocks;
3801 	uint32_t num_completed;
3802 	char aa_buf[512];
3803 	char bb_buf[512];
3804 	char cc_buf[512];
3805 	char write_buf[512];
3806 	struct iovec compare_iov;
3807 	struct iovec write_iov;
3808 	int rc;
3809 
3810 	memset(aa_buf, 0xaa, sizeof(aa_buf));
3811 	memset(bb_buf, 0xbb, sizeof(bb_buf));
3812 	memset(cc_buf, 0xcc, sizeof(cc_buf));
3813 
3814 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = false;
3815 
3816 	ut_init_bdev(NULL);
3817 	fn_table.submit_request = stub_submit_request_get_buf;
3818 	bdev = allocate_bdev("bdev");
3819 
3820 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3821 	CU_ASSERT_EQUAL(rc, 0);
3822 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3823 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3824 	ioch = spdk_bdev_get_io_channel(desc);
3825 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3826 
3827 	fn_table.submit_request = stub_submit_request_get_buf;
3828 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3829 
3830 	offset = 50;
3831 	num_blocks = 1;
3832 	compare_iov.iov_base = aa_buf;
3833 	compare_iov.iov_len = sizeof(aa_buf);
3834 	write_iov.iov_base = bb_buf;
3835 	write_iov.iov_len = sizeof(bb_buf);
3836 
3837 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, num_blocks, 0);
3838 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3839 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, offset, num_blocks, 0);
3840 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3841 
3842 	g_io_done = false;
3843 	g_compare_read_buf = aa_buf;
3844 	g_compare_read_buf_len = sizeof(aa_buf);
3845 	memset(write_buf, 0, sizeof(write_buf));
3846 	g_compare_write_buf = write_buf;
3847 	g_compare_write_buf_len = sizeof(write_buf);
3848 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ioch, &compare_iov, 1, &write_iov, 1,
3849 			offset, num_blocks, io_done, NULL);
3850 	/* Trigger range locking */
3851 	poll_threads();
3852 	CU_ASSERT_EQUAL(rc, 0);
3853 	num_completed = stub_complete_io(1);
3854 	CU_ASSERT_EQUAL(num_completed, 1);
3855 	CU_ASSERT(g_io_done == false);
3856 	num_completed = stub_complete_io(1);
3857 	/* Trigger range unlocking */
3858 	poll_threads();
3859 	CU_ASSERT_EQUAL(num_completed, 1);
3860 	CU_ASSERT(g_io_done == true);
3861 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
3862 	CU_ASSERT(memcmp(write_buf, bb_buf, sizeof(write_buf)) == 0);
3863 
3864 	/* Test miscompare */
3865 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, offset, num_blocks, 0);
3866 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3867 
3868 	g_io_done = false;
3869 	g_compare_read_buf = cc_buf;
3870 	g_compare_read_buf_len = sizeof(cc_buf);
3871 	memset(write_buf, 0, sizeof(write_buf));
3872 	g_compare_write_buf = write_buf;
3873 	g_compare_write_buf_len = sizeof(write_buf);
3874 	rc = spdk_bdev_comparev_and_writev_blocks(desc, ioch, &compare_iov, 1, &write_iov, 1,
3875 			offset, num_blocks, io_done, NULL);
3876 	/* Trigger range locking */
3877 	poll_threads();
3878 	CU_ASSERT_EQUAL(rc, 0);
3879 	num_completed = stub_complete_io(1);
3880 	/* Trigger range unlocking earlier because we expect error here */
3881 	poll_threads();
3882 	CU_ASSERT_EQUAL(num_completed, 1);
3883 	CU_ASSERT(g_io_done == true);
3884 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_MISCOMPARE);
3885 	num_completed = stub_complete_io(1);
3886 	CU_ASSERT_EQUAL(num_completed, 0);
3887 
3888 	spdk_put_io_channel(ioch);
3889 	spdk_bdev_close(desc);
3890 	free_bdev(bdev);
3891 	fn_table.submit_request = stub_submit_request;
3892 	ut_fini_bdev();
3893 
3894 	g_io_types_supported[SPDK_BDEV_IO_TYPE_COMPARE] = true;
3895 
3896 	g_compare_read_buf = NULL;
3897 	g_compare_write_buf = NULL;
3898 }
3899 
3900 static void
3901 bdev_write_zeroes(void)
3902 {
3903 	struct spdk_bdev *bdev;
3904 	struct spdk_bdev_desc *desc = NULL;
3905 	struct spdk_io_channel *ioch;
3906 	struct ut_expected_io *expected_io;
3907 	uint64_t offset, num_io_blocks, num_blocks;
3908 	uint32_t num_completed, num_requests;
3909 	int rc;
3910 
3911 	ut_init_bdev(NULL);
3912 	bdev = allocate_bdev("bdev");
3913 
3914 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
3915 	CU_ASSERT_EQUAL(rc, 0);
3916 	SPDK_CU_ASSERT_FATAL(desc != NULL);
3917 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
3918 	ioch = spdk_bdev_get_io_channel(desc);
3919 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
3920 
3921 	fn_table.submit_request = stub_submit_request;
3922 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
3923 
3924 	/* First test that if the bdev supports write_zeroes, the request won't be split */
3925 	bdev->md_len = 0;
3926 	bdev->blocklen = 4096;
3927 	num_blocks = (ZERO_BUFFER_SIZE / bdev->blocklen) * 2;
3928 
3929 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 0, num_blocks, 0);
3930 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3931 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3932 	CU_ASSERT_EQUAL(rc, 0);
3933 	num_completed = stub_complete_io(1);
3934 	CU_ASSERT_EQUAL(num_completed, 1);
3935 
3936 	/* Check that if write zeroes is not supported it'll be replaced by regular writes */
3937 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, false);
3938 	num_io_blocks = ZERO_BUFFER_SIZE / bdev->blocklen;
3939 	num_requests = 2;
3940 	num_blocks = (ZERO_BUFFER_SIZE / bdev->blocklen) * num_requests;
3941 
3942 	for (offset = 0; offset < num_requests; ++offset) {
3943 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE,
3944 						   offset * num_io_blocks, num_io_blocks, 0);
3945 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3946 	}
3947 
3948 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3949 	CU_ASSERT_EQUAL(rc, 0);
3950 	num_completed = stub_complete_io(num_requests);
3951 	CU_ASSERT_EQUAL(num_completed, num_requests);
3952 
3953 	/* Check that the splitting is correct if bdev has interleaved metadata */
3954 	bdev->md_interleave = true;
3955 	bdev->md_len = 64;
3956 	bdev->blocklen = 4096 + 64;
3957 	num_blocks = (ZERO_BUFFER_SIZE / bdev->blocklen) * 2;
3958 
3959 	num_requests = offset = 0;
3960 	while (offset < num_blocks) {
3961 		num_io_blocks = spdk_min(ZERO_BUFFER_SIZE / bdev->blocklen, num_blocks - offset);
3962 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE,
3963 						   offset, num_io_blocks, 0);
3964 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3965 		offset += num_io_blocks;
3966 		num_requests++;
3967 	}
3968 
3969 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3970 	CU_ASSERT_EQUAL(rc, 0);
3971 	num_completed = stub_complete_io(num_requests);
3972 	CU_ASSERT_EQUAL(num_completed, num_requests);
3973 	num_completed = stub_complete_io(num_requests);
3974 	assert(num_completed == 0);
3975 
3976 	/* Check the the same for separate metadata buffer */
3977 	bdev->md_interleave = false;
3978 	bdev->md_len = 64;
3979 	bdev->blocklen = 4096;
3980 
3981 	num_requests = offset = 0;
3982 	while (offset < num_blocks) {
3983 		num_io_blocks = spdk_min(ZERO_BUFFER_SIZE / (bdev->blocklen + bdev->md_len), num_blocks);
3984 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE,
3985 						   offset, num_io_blocks, 0);
3986 		expected_io->md_buf = (char *)g_bdev_mgr.zero_buffer + num_io_blocks * bdev->blocklen;
3987 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
3988 		offset += num_io_blocks;
3989 		num_requests++;
3990 	}
3991 
3992 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
3993 	CU_ASSERT_EQUAL(rc, 0);
3994 	num_completed = stub_complete_io(num_requests);
3995 	CU_ASSERT_EQUAL(num_completed, num_requests);
3996 
3997 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, true);
3998 	spdk_put_io_channel(ioch);
3999 	spdk_bdev_close(desc);
4000 	free_bdev(bdev);
4001 	ut_fini_bdev();
4002 }
4003 
4004 static void
4005 bdev_zcopy_write(void)
4006 {
4007 	struct spdk_bdev *bdev;
4008 	struct spdk_bdev_desc *desc = NULL;
4009 	struct spdk_io_channel *ioch;
4010 	struct ut_expected_io *expected_io;
4011 	uint64_t offset, num_blocks;
4012 	uint32_t num_completed;
4013 	char aa_buf[512];
4014 	struct iovec iov;
4015 	int rc;
4016 	const bool populate = false;
4017 	const bool commit = true;
4018 
4019 	memset(aa_buf, 0xaa, sizeof(aa_buf));
4020 
4021 	ut_init_bdev(NULL);
4022 	bdev = allocate_bdev("bdev");
4023 
4024 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
4025 	CU_ASSERT_EQUAL(rc, 0);
4026 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4027 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4028 	ioch = spdk_bdev_get_io_channel(desc);
4029 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
4030 
4031 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
4032 
4033 	offset = 50;
4034 	num_blocks = 1;
4035 	iov.iov_base = NULL;
4036 	iov.iov_len = 0;
4037 
4038 	g_zcopy_read_buf = (void *) 0x1122334455667788UL;
4039 	g_zcopy_read_buf_len = (uint32_t) -1;
4040 	/* Do a zcopy start for a write (populate=false) */
4041 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4042 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4043 	g_io_done = false;
4044 	g_zcopy_write_buf = aa_buf;
4045 	g_zcopy_write_buf_len = sizeof(aa_buf);
4046 	g_zcopy_bdev_io = NULL;
4047 	rc = spdk_bdev_zcopy_start(desc, ioch, &iov, 1, offset, num_blocks, populate, io_done, NULL);
4048 	CU_ASSERT_EQUAL(rc, 0);
4049 	num_completed = stub_complete_io(1);
4050 	CU_ASSERT_EQUAL(num_completed, 1);
4051 	CU_ASSERT(g_io_done == true);
4052 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4053 	/* Check that the iov has been set up */
4054 	CU_ASSERT(iov.iov_base == g_zcopy_write_buf);
4055 	CU_ASSERT(iov.iov_len == g_zcopy_write_buf_len);
4056 	/* Check that the bdev_io has been saved */
4057 	CU_ASSERT(g_zcopy_bdev_io != NULL);
4058 	/* Now do the zcopy end for a write (commit=true) */
4059 	g_io_done = false;
4060 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4061 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4062 	rc = spdk_bdev_zcopy_end(g_zcopy_bdev_io, commit, io_done, NULL);
4063 	CU_ASSERT_EQUAL(rc, 0);
4064 	num_completed = stub_complete_io(1);
4065 	CU_ASSERT_EQUAL(num_completed, 1);
4066 	CU_ASSERT(g_io_done == true);
4067 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4068 	/* Check the g_zcopy are reset by io_done */
4069 	CU_ASSERT(g_zcopy_write_buf == NULL);
4070 	CU_ASSERT(g_zcopy_write_buf_len == 0);
4071 	/* Check that io_done has freed the g_zcopy_bdev_io */
4072 	CU_ASSERT(g_zcopy_bdev_io == NULL);
4073 
4074 	/* Check the zcopy read buffer has not been touched which
4075 	 * ensures that the correct buffers were used.
4076 	 */
4077 	CU_ASSERT(g_zcopy_read_buf == (void *) 0x1122334455667788UL);
4078 	CU_ASSERT(g_zcopy_read_buf_len == (uint32_t) -1);
4079 
4080 	spdk_put_io_channel(ioch);
4081 	spdk_bdev_close(desc);
4082 	free_bdev(bdev);
4083 	ut_fini_bdev();
4084 }
4085 
4086 static void
4087 bdev_zcopy_read(void)
4088 {
4089 	struct spdk_bdev *bdev;
4090 	struct spdk_bdev_desc *desc = NULL;
4091 	struct spdk_io_channel *ioch;
4092 	struct ut_expected_io *expected_io;
4093 	uint64_t offset, num_blocks;
4094 	uint32_t num_completed;
4095 	char aa_buf[512];
4096 	struct iovec iov;
4097 	int rc;
4098 	const bool populate = true;
4099 	const bool commit = false;
4100 
4101 	memset(aa_buf, 0xaa, sizeof(aa_buf));
4102 
4103 	ut_init_bdev(NULL);
4104 	bdev = allocate_bdev("bdev");
4105 
4106 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
4107 	CU_ASSERT_EQUAL(rc, 0);
4108 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4109 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4110 	ioch = spdk_bdev_get_io_channel(desc);
4111 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
4112 
4113 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
4114 
4115 	offset = 50;
4116 	num_blocks = 1;
4117 	iov.iov_base = NULL;
4118 	iov.iov_len = 0;
4119 
4120 	g_zcopy_write_buf = (void *) 0x1122334455667788UL;
4121 	g_zcopy_write_buf_len = (uint32_t) -1;
4122 
4123 	/* Do a zcopy start for a read (populate=true) */
4124 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4125 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4126 	g_io_done = false;
4127 	g_zcopy_read_buf = aa_buf;
4128 	g_zcopy_read_buf_len = sizeof(aa_buf);
4129 	g_zcopy_bdev_io = NULL;
4130 	rc = spdk_bdev_zcopy_start(desc, ioch, &iov, 1, offset, num_blocks, populate, io_done, NULL);
4131 	CU_ASSERT_EQUAL(rc, 0);
4132 	num_completed = stub_complete_io(1);
4133 	CU_ASSERT_EQUAL(num_completed, 1);
4134 	CU_ASSERT(g_io_done == true);
4135 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4136 	/* Check that the iov has been set up */
4137 	CU_ASSERT(iov.iov_base == g_zcopy_read_buf);
4138 	CU_ASSERT(iov.iov_len == g_zcopy_read_buf_len);
4139 	/* Check that the bdev_io has been saved */
4140 	CU_ASSERT(g_zcopy_bdev_io != NULL);
4141 
4142 	/* Now do the zcopy end for a read (commit=false) */
4143 	g_io_done = false;
4144 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_ZCOPY, offset, num_blocks, 0);
4145 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
4146 	rc = spdk_bdev_zcopy_end(g_zcopy_bdev_io, commit, io_done, NULL);
4147 	CU_ASSERT_EQUAL(rc, 0);
4148 	num_completed = stub_complete_io(1);
4149 	CU_ASSERT_EQUAL(num_completed, 1);
4150 	CU_ASSERT(g_io_done == true);
4151 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
4152 	/* Check the g_zcopy are reset by io_done */
4153 	CU_ASSERT(g_zcopy_read_buf == NULL);
4154 	CU_ASSERT(g_zcopy_read_buf_len == 0);
4155 	/* Check that io_done has freed the g_zcopy_bdev_io */
4156 	CU_ASSERT(g_zcopy_bdev_io == NULL);
4157 
4158 	/* Check the zcopy write buffer has not been touched which
4159 	 * ensures that the correct buffers were used.
4160 	 */
4161 	CU_ASSERT(g_zcopy_write_buf == (void *) 0x1122334455667788UL);
4162 	CU_ASSERT(g_zcopy_write_buf_len == (uint32_t) -1);
4163 
4164 	spdk_put_io_channel(ioch);
4165 	spdk_bdev_close(desc);
4166 	free_bdev(bdev);
4167 	ut_fini_bdev();
4168 }
4169 
4170 static void
4171 bdev_open_while_hotremove(void)
4172 {
4173 	struct spdk_bdev *bdev;
4174 	struct spdk_bdev_desc *desc[2] = {};
4175 	int rc;
4176 
4177 	bdev = allocate_bdev("bdev");
4178 
4179 	rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[0]);
4180 	CU_ASSERT(rc == 0);
4181 	SPDK_CU_ASSERT_FATAL(desc[0] != NULL);
4182 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc[0]));
4183 
4184 	spdk_bdev_unregister(bdev, NULL, NULL);
4185 	/* Bdev unregister is handled asynchronously. Poll thread to complete. */
4186 	poll_threads();
4187 
4188 	rc = spdk_bdev_open_ext("bdev", false, bdev_ut_event_cb, NULL, &desc[1]);
4189 	CU_ASSERT(rc == -ENODEV);
4190 	SPDK_CU_ASSERT_FATAL(desc[1] == NULL);
4191 
4192 	spdk_bdev_close(desc[0]);
4193 	free_bdev(bdev);
4194 }
4195 
4196 static void
4197 bdev_close_while_hotremove(void)
4198 {
4199 	struct spdk_bdev *bdev;
4200 	struct spdk_bdev_desc *desc = NULL;
4201 	int rc = 0;
4202 
4203 	bdev = allocate_bdev("bdev");
4204 
4205 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc, &desc);
4206 	CU_ASSERT_EQUAL(rc, 0);
4207 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4208 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4209 
4210 	/* Simulate hot-unplug by unregistering bdev */
4211 	g_event_type1 = 0xFF;
4212 	g_unregister_arg = NULL;
4213 	g_unregister_rc = -1;
4214 	spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678);
4215 	/* Close device while remove event is in flight */
4216 	spdk_bdev_close(desc);
4217 
4218 	/* Ensure that unregister callback is delayed */
4219 	CU_ASSERT_EQUAL(g_unregister_arg, NULL);
4220 	CU_ASSERT_EQUAL(g_unregister_rc, -1);
4221 
4222 	poll_threads();
4223 
4224 	/* Event callback shall not be issued because device was closed */
4225 	CU_ASSERT_EQUAL(g_event_type1, 0xFF);
4226 	/* Unregister callback is issued */
4227 	CU_ASSERT_EQUAL(g_unregister_arg, (void *)0x12345678);
4228 	CU_ASSERT_EQUAL(g_unregister_rc, 0);
4229 
4230 	free_bdev(bdev);
4231 }
4232 
4233 static void
4234 bdev_open_ext(void)
4235 {
4236 	struct spdk_bdev *bdev;
4237 	struct spdk_bdev_desc *desc1 = NULL;
4238 	struct spdk_bdev_desc *desc2 = NULL;
4239 	int rc = 0;
4240 
4241 	bdev = allocate_bdev("bdev");
4242 
4243 	rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1);
4244 	CU_ASSERT_EQUAL(rc, -EINVAL);
4245 
4246 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1);
4247 	CU_ASSERT_EQUAL(rc, 0);
4248 
4249 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2);
4250 	CU_ASSERT_EQUAL(rc, 0);
4251 
4252 	g_event_type1 = 0xFF;
4253 	g_event_type2 = 0xFF;
4254 
4255 	/* Simulate hot-unplug by unregistering bdev */
4256 	spdk_bdev_unregister(bdev, NULL, NULL);
4257 	poll_threads();
4258 
4259 	/* Check if correct events have been triggered in event callback fn */
4260 	CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE);
4261 	CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE);
4262 
4263 	free_bdev(bdev);
4264 	poll_threads();
4265 }
4266 
4267 static void
4268 bdev_open_ext_unregister(void)
4269 {
4270 	struct spdk_bdev *bdev;
4271 	struct spdk_bdev_desc *desc1 = NULL;
4272 	struct spdk_bdev_desc *desc2 = NULL;
4273 	struct spdk_bdev_desc *desc3 = NULL;
4274 	struct spdk_bdev_desc *desc4 = NULL;
4275 	int rc = 0;
4276 
4277 	bdev = allocate_bdev("bdev");
4278 
4279 	rc = spdk_bdev_open_ext("bdev", true, NULL, NULL, &desc1);
4280 	CU_ASSERT_EQUAL(rc, -EINVAL);
4281 
4282 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb1, &desc1, &desc1);
4283 	CU_ASSERT_EQUAL(rc, 0);
4284 
4285 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb2, &desc2, &desc2);
4286 	CU_ASSERT_EQUAL(rc, 0);
4287 
4288 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb3, &desc3, &desc3);
4289 	CU_ASSERT_EQUAL(rc, 0);
4290 
4291 	rc = spdk_bdev_open_ext("bdev", true, bdev_open_cb4, &desc4, &desc4);
4292 	CU_ASSERT_EQUAL(rc, 0);
4293 
4294 	g_event_type1 = 0xFF;
4295 	g_event_type2 = 0xFF;
4296 	g_event_type3 = 0xFF;
4297 	g_event_type4 = 0xFF;
4298 
4299 	g_unregister_arg = NULL;
4300 	g_unregister_rc = -1;
4301 
4302 	/* Simulate hot-unplug by unregistering bdev */
4303 	spdk_bdev_unregister(bdev, bdev_unregister_cb, (void *)0x12345678);
4304 
4305 	/*
4306 	 * Unregister is handled asynchronously and event callback
4307 	 * (i.e., above bdev_open_cbN) will be called.
4308 	 * For bdev_open_cb3 and bdev_open_cb4, it is intended to not
4309 	 * close the desc3 and desc4 so that the bdev is not closed.
4310 	 */
4311 	poll_threads();
4312 
4313 	/* Check if correct events have been triggered in event callback fn */
4314 	CU_ASSERT_EQUAL(g_event_type1, SPDK_BDEV_EVENT_REMOVE);
4315 	CU_ASSERT_EQUAL(g_event_type2, SPDK_BDEV_EVENT_REMOVE);
4316 	CU_ASSERT_EQUAL(g_event_type3, SPDK_BDEV_EVENT_REMOVE);
4317 	CU_ASSERT_EQUAL(g_event_type4, SPDK_BDEV_EVENT_REMOVE);
4318 
4319 	/* Check that unregister callback is delayed */
4320 	CU_ASSERT(g_unregister_arg == NULL);
4321 	CU_ASSERT(g_unregister_rc == -1);
4322 
4323 	/*
4324 	 * Explicitly close desc3. As desc4 is still opened there, the
4325 	 * unergister callback is still delayed to execute.
4326 	 */
4327 	spdk_bdev_close(desc3);
4328 	CU_ASSERT(g_unregister_arg == NULL);
4329 	CU_ASSERT(g_unregister_rc == -1);
4330 
4331 	/*
4332 	 * Explicitly close desc4 to trigger the ongoing bdev unregister
4333 	 * operation after last desc is closed.
4334 	 */
4335 	spdk_bdev_close(desc4);
4336 
4337 	/* Poll the thread for the async unregister operation */
4338 	poll_threads();
4339 
4340 	/* Check that unregister callback is executed */
4341 	CU_ASSERT(g_unregister_arg == (void *)0x12345678);
4342 	CU_ASSERT(g_unregister_rc == 0);
4343 
4344 	free_bdev(bdev);
4345 	poll_threads();
4346 }
4347 
4348 struct timeout_io_cb_arg {
4349 	struct iovec iov;
4350 	uint8_t type;
4351 };
4352 
4353 static int
4354 bdev_channel_count_submitted_io(struct spdk_bdev_channel *ch)
4355 {
4356 	struct spdk_bdev_io *bdev_io;
4357 	int n = 0;
4358 
4359 	if (!ch) {
4360 		return -1;
4361 	}
4362 
4363 	TAILQ_FOREACH(bdev_io, &ch->io_submitted, internal.ch_link) {
4364 		n++;
4365 	}
4366 
4367 	return n;
4368 }
4369 
4370 static void
4371 bdev_channel_io_timeout_cb(void *cb_arg, struct spdk_bdev_io *bdev_io)
4372 {
4373 	struct timeout_io_cb_arg *ctx = cb_arg;
4374 
4375 	ctx->type = bdev_io->type;
4376 	ctx->iov.iov_base = bdev_io->iov.iov_base;
4377 	ctx->iov.iov_len = bdev_io->iov.iov_len;
4378 }
4379 
4380 static void
4381 bdev_set_io_timeout(void)
4382 {
4383 	struct spdk_bdev *bdev;
4384 	struct spdk_bdev_desc *desc = NULL;
4385 	struct spdk_io_channel *io_ch = NULL;
4386 	struct spdk_bdev_channel *bdev_ch = NULL;
4387 	struct timeout_io_cb_arg cb_arg;
4388 
4389 	ut_init_bdev(NULL);
4390 	bdev = allocate_bdev("bdev");
4391 
4392 	CU_ASSERT(spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc) == 0);
4393 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4394 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4395 
4396 	io_ch = spdk_bdev_get_io_channel(desc);
4397 	CU_ASSERT(io_ch != NULL);
4398 
4399 	bdev_ch = spdk_io_channel_get_ctx(io_ch);
4400 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
4401 
4402 	/* This is the part1.
4403 	 * We will check the bdev_ch->io_submitted list
4404 	 * TO make sure that it can link IOs and only the user submitted IOs
4405 	 */
4406 	CU_ASSERT(spdk_bdev_read(desc, io_ch, (void *)0x1000, 0, 4096, io_done, NULL) == 0);
4407 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4408 	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
4409 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
4410 	stub_complete_io(1);
4411 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4412 	stub_complete_io(1);
4413 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4414 
4415 	/* Split IO */
4416 	bdev->optimal_io_boundary = 16;
4417 	bdev->split_on_optimal_io_boundary = true;
4418 
4419 	/* Now test that a single-vector command is split correctly.
4420 	 * Offset 14, length 8, payload 0xF000
4421 	 *  Child - Offset 14, length 2, payload 0xF000
4422 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
4423 	 *
4424 	 * Set up the expected values before calling spdk_bdev_read_blocks
4425 	 */
4426 	CU_ASSERT(spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL) == 0);
4427 	/* We count all submitted IOs including IO that are generated by splitting. */
4428 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 3);
4429 	stub_complete_io(1);
4430 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
4431 	stub_complete_io(1);
4432 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4433 
4434 	/* Also include the reset IO */
4435 	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
4436 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4437 	poll_threads();
4438 	stub_complete_io(1);
4439 	poll_threads();
4440 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4441 
4442 	/* This is part2
4443 	 * Test the desc timeout poller register
4444 	 */
4445 
4446 	/* Successfully set the timeout */
4447 	CU_ASSERT(spdk_bdev_set_timeout(desc, 30, bdev_channel_io_timeout_cb, &cb_arg) == 0);
4448 	CU_ASSERT(desc->io_timeout_poller != NULL);
4449 	CU_ASSERT(desc->timeout_in_sec == 30);
4450 	CU_ASSERT(desc->cb_fn == bdev_channel_io_timeout_cb);
4451 	CU_ASSERT(desc->cb_arg == &cb_arg);
4452 
4453 	/* Change the timeout limit */
4454 	CU_ASSERT(spdk_bdev_set_timeout(desc, 20, bdev_channel_io_timeout_cb, &cb_arg) == 0);
4455 	CU_ASSERT(desc->io_timeout_poller != NULL);
4456 	CU_ASSERT(desc->timeout_in_sec == 20);
4457 	CU_ASSERT(desc->cb_fn == bdev_channel_io_timeout_cb);
4458 	CU_ASSERT(desc->cb_arg == &cb_arg);
4459 
4460 	/* Disable the timeout */
4461 	CU_ASSERT(spdk_bdev_set_timeout(desc, 0, NULL, NULL) == 0);
4462 	CU_ASSERT(desc->io_timeout_poller == NULL);
4463 
4464 	/* This the part3
4465 	 * We will test to catch timeout IO and check whether the IO is
4466 	 * the submitted one.
4467 	 */
4468 	memset(&cb_arg, 0, sizeof(cb_arg));
4469 	CU_ASSERT(spdk_bdev_set_timeout(desc, 30, bdev_channel_io_timeout_cb, &cb_arg) == 0);
4470 	CU_ASSERT(spdk_bdev_write_blocks(desc, io_ch, (void *)0x1000, 0, 1, io_done, NULL) == 0);
4471 
4472 	/* Don't reach the limit */
4473 	spdk_delay_us(15 * spdk_get_ticks_hz());
4474 	poll_threads();
4475 	CU_ASSERT(cb_arg.type == 0);
4476 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0x0);
4477 	CU_ASSERT(cb_arg.iov.iov_len == 0);
4478 
4479 	/* 15 + 15 = 30 reach the limit */
4480 	spdk_delay_us(15 * spdk_get_ticks_hz());
4481 	poll_threads();
4482 	CU_ASSERT(cb_arg.type == SPDK_BDEV_IO_TYPE_WRITE);
4483 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0x1000);
4484 	CU_ASSERT(cb_arg.iov.iov_len == 1 * bdev->blocklen);
4485 	stub_complete_io(1);
4486 
4487 	/* Use the same split IO above and check the IO */
4488 	memset(&cb_arg, 0, sizeof(cb_arg));
4489 	CU_ASSERT(spdk_bdev_write_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, NULL) == 0);
4490 
4491 	/* The first child complete in time */
4492 	spdk_delay_us(15 * spdk_get_ticks_hz());
4493 	poll_threads();
4494 	stub_complete_io(1);
4495 	CU_ASSERT(cb_arg.type == 0);
4496 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0x0);
4497 	CU_ASSERT(cb_arg.iov.iov_len == 0);
4498 
4499 	/* The second child reach the limit */
4500 	spdk_delay_us(15 * spdk_get_ticks_hz());
4501 	poll_threads();
4502 	CU_ASSERT(cb_arg.type == SPDK_BDEV_IO_TYPE_WRITE);
4503 	CU_ASSERT(cb_arg.iov.iov_base == (void *)0xF000);
4504 	CU_ASSERT(cb_arg.iov.iov_len == 8 * bdev->blocklen);
4505 	stub_complete_io(1);
4506 
4507 	/* Also include the reset IO */
4508 	memset(&cb_arg, 0, sizeof(cb_arg));
4509 	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
4510 	spdk_delay_us(30 * spdk_get_ticks_hz());
4511 	poll_threads();
4512 	CU_ASSERT(cb_arg.type == SPDK_BDEV_IO_TYPE_RESET);
4513 	stub_complete_io(1);
4514 	poll_threads();
4515 
4516 	spdk_put_io_channel(io_ch);
4517 	spdk_bdev_close(desc);
4518 	free_bdev(bdev);
4519 	ut_fini_bdev();
4520 }
4521 
4522 static void
4523 bdev_set_qd_sampling(void)
4524 {
4525 	struct spdk_bdev *bdev;
4526 	struct spdk_bdev_desc *desc = NULL;
4527 	struct spdk_io_channel *io_ch = NULL;
4528 	struct spdk_bdev_channel *bdev_ch = NULL;
4529 	struct timeout_io_cb_arg cb_arg;
4530 
4531 	ut_init_bdev(NULL);
4532 	bdev = allocate_bdev("bdev");
4533 
4534 	CU_ASSERT(spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc) == 0);
4535 	SPDK_CU_ASSERT_FATAL(desc != NULL);
4536 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4537 
4538 	io_ch = spdk_bdev_get_io_channel(desc);
4539 	CU_ASSERT(io_ch != NULL);
4540 
4541 	bdev_ch = spdk_io_channel_get_ctx(io_ch);
4542 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
4543 
4544 	/* This is the part1.
4545 	 * We will check the bdev_ch->io_submitted list
4546 	 * TO make sure that it can link IOs and only the user submitted IOs
4547 	 */
4548 	CU_ASSERT(spdk_bdev_read(desc, io_ch, (void *)0x1000, 0, 4096, io_done, NULL) == 0);
4549 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4550 	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
4551 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
4552 	stub_complete_io(1);
4553 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4554 	stub_complete_io(1);
4555 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);
4556 
4557 	/* This is the part2.
4558 	 * Test the bdev's qd poller register
4559 	 */
4560 	/* 1st Successfully set the qd sampling period */
4561 	spdk_bdev_set_qd_sampling_period(bdev, 10);
4562 	CU_ASSERT(bdev->internal.new_period == 10);
4563 	CU_ASSERT(bdev->internal.period == 10);
4564 	CU_ASSERT(bdev->internal.qd_desc != NULL);
4565 	poll_threads();
4566 	CU_ASSERT(bdev->internal.qd_poller != NULL);
4567 
4568 	/* 2nd Change the qd sampling period */
4569 	spdk_bdev_set_qd_sampling_period(bdev, 20);
4570 	CU_ASSERT(bdev->internal.new_period == 20);
4571 	CU_ASSERT(bdev->internal.period == 10);
4572 	CU_ASSERT(bdev->internal.qd_desc != NULL);
4573 	poll_threads();
4574 	CU_ASSERT(bdev->internal.qd_poller != NULL);
4575 	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
4576 
4577 	/* 3rd Change the qd sampling period and verify qd_poll_in_progress */
4578 	spdk_delay_us(20);
4579 	poll_thread_times(0, 1);
4580 	CU_ASSERT(bdev->internal.qd_poll_in_progress == true);
4581 	spdk_bdev_set_qd_sampling_period(bdev, 30);
4582 	CU_ASSERT(bdev->internal.new_period == 30);
4583 	CU_ASSERT(bdev->internal.period == 20);
4584 	poll_threads();
4585 	CU_ASSERT(bdev->internal.qd_poll_in_progress == false);
4586 	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
4587 
4588 	/* 4th Disable the qd sampling period */
4589 	spdk_bdev_set_qd_sampling_period(bdev, 0);
4590 	CU_ASSERT(bdev->internal.new_period == 0);
4591 	CU_ASSERT(bdev->internal.period == 30);
4592 	poll_threads();
4593 	CU_ASSERT(bdev->internal.qd_poller == NULL);
4594 	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
4595 	CU_ASSERT(bdev->internal.qd_desc == NULL);
4596 
4597 	/* This is the part3.
4598 	 * We will test the submitted IO and reset works
4599 	 * properly with the qd sampling.
4600 	 */
4601 	memset(&cb_arg, 0, sizeof(cb_arg));
4602 	spdk_bdev_set_qd_sampling_period(bdev, 1);
4603 	poll_threads();
4604 
4605 	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
4606 	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
4607 
4608 	/* Also include the reset IO */
4609 	memset(&cb_arg, 0, sizeof(cb_arg));
4610 	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
4611 	poll_threads();
4612 
4613 	/* Close the desc */
4614 	spdk_put_io_channel(io_ch);
4615 	spdk_bdev_close(desc);
4616 
4617 	/* Complete the submitted IO and reset */
4618 	stub_complete_io(2);
4619 	poll_threads();
4620 
4621 	free_bdev(bdev);
4622 	ut_fini_bdev();
4623 }
4624 
4625 static void
4626 lba_range_overlap(void)
4627 {
4628 	struct lba_range r1, r2;
4629 
4630 	r1.offset = 100;
4631 	r1.length = 50;
4632 
4633 	r2.offset = 0;
4634 	r2.length = 1;
4635 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4636 
4637 	r2.offset = 0;
4638 	r2.length = 100;
4639 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4640 
4641 	r2.offset = 0;
4642 	r2.length = 110;
4643 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4644 
4645 	r2.offset = 100;
4646 	r2.length = 10;
4647 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4648 
4649 	r2.offset = 110;
4650 	r2.length = 20;
4651 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4652 
4653 	r2.offset = 140;
4654 	r2.length = 150;
4655 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4656 
4657 	r2.offset = 130;
4658 	r2.length = 200;
4659 	CU_ASSERT(bdev_lba_range_overlapped(&r1, &r2));
4660 
4661 	r2.offset = 150;
4662 	r2.length = 100;
4663 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4664 
4665 	r2.offset = 110;
4666 	r2.length = 0;
4667 	CU_ASSERT(!bdev_lba_range_overlapped(&r1, &r2));
4668 }
4669 
4670 static bool g_lock_lba_range_done;
4671 static bool g_unlock_lba_range_done;
4672 
4673 static void
4674 lock_lba_range_done(void *ctx, int status)
4675 {
4676 	g_lock_lba_range_done = true;
4677 }
4678 
4679 static void
4680 unlock_lba_range_done(void *ctx, int status)
4681 {
4682 	g_unlock_lba_range_done = true;
4683 }
4684 
4685 static void
4686 lock_lba_range_check_ranges(void)
4687 {
4688 	struct spdk_bdev *bdev;
4689 	struct spdk_bdev_desc *desc = NULL;
4690 	struct spdk_io_channel *io_ch;
4691 	struct spdk_bdev_channel *channel;
4692 	struct lba_range *range;
4693 	int ctx1;
4694 	int rc;
4695 
4696 	ut_init_bdev(NULL);
4697 	bdev = allocate_bdev("bdev0");
4698 
4699 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4700 	CU_ASSERT(rc == 0);
4701 	CU_ASSERT(desc != NULL);
4702 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4703 	io_ch = spdk_bdev_get_io_channel(desc);
4704 	CU_ASSERT(io_ch != NULL);
4705 	channel = spdk_io_channel_get_ctx(io_ch);
4706 
4707 	g_lock_lba_range_done = false;
4708 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4709 	CU_ASSERT(rc == 0);
4710 	poll_threads();
4711 
4712 	CU_ASSERT(g_lock_lba_range_done == true);
4713 	range = TAILQ_FIRST(&channel->locked_ranges);
4714 	SPDK_CU_ASSERT_FATAL(range != NULL);
4715 	CU_ASSERT(range->offset == 20);
4716 	CU_ASSERT(range->length == 10);
4717 	CU_ASSERT(range->owner_ch == channel);
4718 
4719 	/* Unlocks must exactly match a lock. */
4720 	g_unlock_lba_range_done = false;
4721 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 1, unlock_lba_range_done, &ctx1);
4722 	CU_ASSERT(rc == -EINVAL);
4723 	CU_ASSERT(g_unlock_lba_range_done == false);
4724 
4725 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, unlock_lba_range_done, &ctx1);
4726 	CU_ASSERT(rc == 0);
4727 	spdk_delay_us(100);
4728 	poll_threads();
4729 
4730 	CU_ASSERT(g_unlock_lba_range_done == true);
4731 	CU_ASSERT(TAILQ_EMPTY(&channel->locked_ranges));
4732 
4733 	spdk_put_io_channel(io_ch);
4734 	spdk_bdev_close(desc);
4735 	free_bdev(bdev);
4736 	ut_fini_bdev();
4737 }
4738 
4739 static void
4740 lock_lba_range_with_io_outstanding(void)
4741 {
4742 	struct spdk_bdev *bdev;
4743 	struct spdk_bdev_desc *desc = NULL;
4744 	struct spdk_io_channel *io_ch;
4745 	struct spdk_bdev_channel *channel;
4746 	struct lba_range *range;
4747 	char buf[4096];
4748 	int ctx1;
4749 	int rc;
4750 
4751 	ut_init_bdev(NULL);
4752 	bdev = allocate_bdev("bdev0");
4753 
4754 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4755 	CU_ASSERT(rc == 0);
4756 	CU_ASSERT(desc != NULL);
4757 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4758 	io_ch = spdk_bdev_get_io_channel(desc);
4759 	CU_ASSERT(io_ch != NULL);
4760 	channel = spdk_io_channel_get_ctx(io_ch);
4761 
4762 	g_io_done = false;
4763 	rc = spdk_bdev_read_blocks(desc, io_ch, buf, 20, 1, io_done, &ctx1);
4764 	CU_ASSERT(rc == 0);
4765 
4766 	g_lock_lba_range_done = false;
4767 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4768 	CU_ASSERT(rc == 0);
4769 	poll_threads();
4770 
4771 	/* The lock should immediately become valid, since there are no outstanding
4772 	 * write I/O.
4773 	 */
4774 	CU_ASSERT(g_io_done == false);
4775 	CU_ASSERT(g_lock_lba_range_done == true);
4776 	range = TAILQ_FIRST(&channel->locked_ranges);
4777 	SPDK_CU_ASSERT_FATAL(range != NULL);
4778 	CU_ASSERT(range->offset == 20);
4779 	CU_ASSERT(range->length == 10);
4780 	CU_ASSERT(range->owner_ch == channel);
4781 	CU_ASSERT(range->locked_ctx == &ctx1);
4782 
4783 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4784 	CU_ASSERT(rc == 0);
4785 	stub_complete_io(1);
4786 	spdk_delay_us(100);
4787 	poll_threads();
4788 
4789 	CU_ASSERT(TAILQ_EMPTY(&channel->locked_ranges));
4790 
4791 	/* Now try again, but with a write I/O. */
4792 	g_io_done = false;
4793 	rc = spdk_bdev_write_blocks(desc, io_ch, buf, 20, 1, io_done, &ctx1);
4794 	CU_ASSERT(rc == 0);
4795 
4796 	g_lock_lba_range_done = false;
4797 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4798 	CU_ASSERT(rc == 0);
4799 	poll_threads();
4800 
4801 	/* The lock should not be fully valid yet, since a write I/O is outstanding.
4802 	 * But note that the range should be on the channel's locked_list, to make sure no
4803 	 * new write I/O are started.
4804 	 */
4805 	CU_ASSERT(g_io_done == false);
4806 	CU_ASSERT(g_lock_lba_range_done == false);
4807 	range = TAILQ_FIRST(&channel->locked_ranges);
4808 	SPDK_CU_ASSERT_FATAL(range != NULL);
4809 	CU_ASSERT(range->offset == 20);
4810 	CU_ASSERT(range->length == 10);
4811 
4812 	/* Complete the write I/O.  This should make the lock valid (checked by confirming
4813 	 * our callback was invoked).
4814 	 */
4815 	stub_complete_io(1);
4816 	spdk_delay_us(100);
4817 	poll_threads();
4818 	CU_ASSERT(g_io_done == true);
4819 	CU_ASSERT(g_lock_lba_range_done == true);
4820 
4821 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, unlock_lba_range_done, &ctx1);
4822 	CU_ASSERT(rc == 0);
4823 	poll_threads();
4824 
4825 	CU_ASSERT(TAILQ_EMPTY(&channel->locked_ranges));
4826 
4827 	spdk_put_io_channel(io_ch);
4828 	spdk_bdev_close(desc);
4829 	free_bdev(bdev);
4830 	ut_fini_bdev();
4831 }
4832 
4833 static void
4834 lock_lba_range_overlapped(void)
4835 {
4836 	struct spdk_bdev *bdev;
4837 	struct spdk_bdev_desc *desc = NULL;
4838 	struct spdk_io_channel *io_ch;
4839 	struct spdk_bdev_channel *channel;
4840 	struct lba_range *range;
4841 	int ctx1;
4842 	int rc;
4843 
4844 	ut_init_bdev(NULL);
4845 	bdev = allocate_bdev("bdev0");
4846 
4847 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4848 	CU_ASSERT(rc == 0);
4849 	CU_ASSERT(desc != NULL);
4850 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
4851 	io_ch = spdk_bdev_get_io_channel(desc);
4852 	CU_ASSERT(io_ch != NULL);
4853 	channel = spdk_io_channel_get_ctx(io_ch);
4854 
4855 	/* Lock range 20-29. */
4856 	g_lock_lba_range_done = false;
4857 	rc = bdev_lock_lba_range(desc, io_ch, 20, 10, lock_lba_range_done, &ctx1);
4858 	CU_ASSERT(rc == 0);
4859 	poll_threads();
4860 
4861 	CU_ASSERT(g_lock_lba_range_done == true);
4862 	range = TAILQ_FIRST(&channel->locked_ranges);
4863 	SPDK_CU_ASSERT_FATAL(range != NULL);
4864 	CU_ASSERT(range->offset == 20);
4865 	CU_ASSERT(range->length == 10);
4866 
4867 	/* Try to lock range 25-39.  It should not lock immediately, since it overlaps with
4868 	 * 20-29.
4869 	 */
4870 	g_lock_lba_range_done = false;
4871 	rc = bdev_lock_lba_range(desc, io_ch, 25, 15, lock_lba_range_done, &ctx1);
4872 	CU_ASSERT(rc == 0);
4873 	poll_threads();
4874 
4875 	CU_ASSERT(g_lock_lba_range_done == false);
4876 	range = TAILQ_FIRST(&bdev->internal.pending_locked_ranges);
4877 	SPDK_CU_ASSERT_FATAL(range != NULL);
4878 	CU_ASSERT(range->offset == 25);
4879 	CU_ASSERT(range->length == 15);
4880 
4881 	/* Unlock 20-29.  This should result in range 25-39 now getting locked since it
4882 	 * no longer overlaps with an active lock.
4883 	 */
4884 	g_unlock_lba_range_done = false;
4885 	rc = bdev_unlock_lba_range(desc, io_ch, 20, 10, unlock_lba_range_done, &ctx1);
4886 	CU_ASSERT(rc == 0);
4887 	poll_threads();
4888 
4889 	CU_ASSERT(g_unlock_lba_range_done == true);
4890 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.pending_locked_ranges));
4891 	range = TAILQ_FIRST(&channel->locked_ranges);
4892 	SPDK_CU_ASSERT_FATAL(range != NULL);
4893 	CU_ASSERT(range->offset == 25);
4894 	CU_ASSERT(range->length == 15);
4895 
4896 	/* Lock 40-59.  This should immediately lock since it does not overlap with the
4897 	 * currently active 25-39 lock.
4898 	 */
4899 	g_lock_lba_range_done = false;
4900 	rc = bdev_lock_lba_range(desc, io_ch, 40, 20, lock_lba_range_done, &ctx1);
4901 	CU_ASSERT(rc == 0);
4902 	poll_threads();
4903 
4904 	CU_ASSERT(g_lock_lba_range_done == true);
4905 	range = TAILQ_FIRST(&bdev->internal.locked_ranges);
4906 	SPDK_CU_ASSERT_FATAL(range != NULL);
4907 	range = TAILQ_NEXT(range, tailq);
4908 	SPDK_CU_ASSERT_FATAL(range != NULL);
4909 	CU_ASSERT(range->offset == 40);
4910 	CU_ASSERT(range->length == 20);
4911 
4912 	/* Try to lock 35-44.  Note that this overlaps with both 25-39 and 40-59. */
4913 	g_lock_lba_range_done = false;
4914 	rc = bdev_lock_lba_range(desc, io_ch, 35, 10, lock_lba_range_done, &ctx1);
4915 	CU_ASSERT(rc == 0);
4916 	poll_threads();
4917 
4918 	CU_ASSERT(g_lock_lba_range_done == false);
4919 	range = TAILQ_FIRST(&bdev->internal.pending_locked_ranges);
4920 	SPDK_CU_ASSERT_FATAL(range != NULL);
4921 	CU_ASSERT(range->offset == 35);
4922 	CU_ASSERT(range->length == 10);
4923 
4924 	/* Unlock 25-39.  Make sure that 35-44 is still in the pending list, since
4925 	 * the 40-59 lock is still active.
4926 	 */
4927 	g_unlock_lba_range_done = false;
4928 	rc = bdev_unlock_lba_range(desc, io_ch, 25, 15, unlock_lba_range_done, &ctx1);
4929 	CU_ASSERT(rc == 0);
4930 	poll_threads();
4931 
4932 	CU_ASSERT(g_unlock_lba_range_done == true);
4933 	CU_ASSERT(g_lock_lba_range_done == false);
4934 	range = TAILQ_FIRST(&bdev->internal.pending_locked_ranges);
4935 	SPDK_CU_ASSERT_FATAL(range != NULL);
4936 	CU_ASSERT(range->offset == 35);
4937 	CU_ASSERT(range->length == 10);
4938 
4939 	/* Unlock 40-59.  This should result in 35-44 now getting locked, since there are
4940 	 * no longer any active overlapping locks.
4941 	 */
4942 	g_unlock_lba_range_done = false;
4943 	rc = bdev_unlock_lba_range(desc, io_ch, 40, 20, unlock_lba_range_done, &ctx1);
4944 	CU_ASSERT(rc == 0);
4945 	poll_threads();
4946 
4947 	CU_ASSERT(g_unlock_lba_range_done == true);
4948 	CU_ASSERT(g_lock_lba_range_done == true);
4949 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.pending_locked_ranges));
4950 	range = TAILQ_FIRST(&bdev->internal.locked_ranges);
4951 	SPDK_CU_ASSERT_FATAL(range != NULL);
4952 	CU_ASSERT(range->offset == 35);
4953 	CU_ASSERT(range->length == 10);
4954 
4955 	/* Finally, unlock 35-44. */
4956 	g_unlock_lba_range_done = false;
4957 	rc = bdev_unlock_lba_range(desc, io_ch, 35, 10, unlock_lba_range_done, &ctx1);
4958 	CU_ASSERT(rc == 0);
4959 	poll_threads();
4960 
4961 	CU_ASSERT(g_unlock_lba_range_done == true);
4962 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.locked_ranges));
4963 
4964 	spdk_put_io_channel(io_ch);
4965 	spdk_bdev_close(desc);
4966 	free_bdev(bdev);
4967 	ut_fini_bdev();
4968 }
4969 
4970 static void
4971 abort_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
4972 {
4973 	g_abort_done = true;
4974 	g_abort_status = bdev_io->internal.status;
4975 	spdk_bdev_free_io(bdev_io);
4976 }
4977 
4978 static void
4979 bdev_io_abort(void)
4980 {
4981 	struct spdk_bdev *bdev;
4982 	struct spdk_bdev_desc *desc = NULL;
4983 	struct spdk_io_channel *io_ch;
4984 	struct spdk_bdev_channel *channel;
4985 	struct spdk_bdev_mgmt_channel *mgmt_ch;
4986 	struct spdk_bdev_opts bdev_opts = {};
4987 	struct iovec iov[SPDK_BDEV_IO_NUM_CHILD_IOV * 2];
4988 	uint64_t io_ctx1 = 0, io_ctx2 = 0, i;
4989 	int rc;
4990 
4991 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
4992 	bdev_opts.bdev_io_pool_size = 7;
4993 	bdev_opts.bdev_io_cache_size = 2;
4994 	ut_init_bdev(&bdev_opts);
4995 
4996 	bdev = allocate_bdev("bdev0");
4997 
4998 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
4999 	CU_ASSERT(rc == 0);
5000 	CU_ASSERT(desc != NULL);
5001 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5002 	io_ch = spdk_bdev_get_io_channel(desc);
5003 	CU_ASSERT(io_ch != NULL);
5004 	channel = spdk_io_channel_get_ctx(io_ch);
5005 	mgmt_ch = channel->shared_resource->mgmt_ch;
5006 
5007 	g_abort_done = false;
5008 
5009 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_ABORT, false);
5010 
5011 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5012 	CU_ASSERT(rc == -ENOTSUP);
5013 
5014 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_ABORT, true);
5015 
5016 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx2, abort_done, NULL);
5017 	CU_ASSERT(rc == 0);
5018 	CU_ASSERT(g_abort_done == true);
5019 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_FAILED);
5020 
5021 	/* Test the case that the target I/O was successfully aborted. */
5022 	g_io_done = false;
5023 
5024 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, &io_ctx1);
5025 	CU_ASSERT(rc == 0);
5026 	CU_ASSERT(g_io_done == false);
5027 
5028 	g_abort_done = false;
5029 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5030 
5031 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5032 	CU_ASSERT(rc == 0);
5033 	CU_ASSERT(g_io_done == true);
5034 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5035 	stub_complete_io(1);
5036 	CU_ASSERT(g_abort_done == true);
5037 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5038 
5039 	/* Test the case that the target I/O was not aborted because it completed
5040 	 * in the middle of execution of the abort.
5041 	 */
5042 	g_io_done = false;
5043 
5044 	rc = spdk_bdev_read_blocks(desc, io_ch, NULL, 0, 1, io_done, &io_ctx1);
5045 	CU_ASSERT(rc == 0);
5046 	CU_ASSERT(g_io_done == false);
5047 
5048 	g_abort_done = false;
5049 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
5050 
5051 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5052 	CU_ASSERT(rc == 0);
5053 	CU_ASSERT(g_io_done == false);
5054 
5055 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5056 	stub_complete_io(1);
5057 	CU_ASSERT(g_io_done == true);
5058 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5059 
5060 	g_io_exp_status = SPDK_BDEV_IO_STATUS_FAILED;
5061 	stub_complete_io(1);
5062 	CU_ASSERT(g_abort_done == true);
5063 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5064 
5065 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5066 
5067 	bdev->optimal_io_boundary = 16;
5068 	bdev->split_on_optimal_io_boundary = true;
5069 
5070 	/* Test that a single-vector command which is split is aborted correctly.
5071 	 * Offset 14, length 8, payload 0xF000
5072 	 *  Child - Offset 14, length 2, payload 0xF000
5073 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
5074 	 */
5075 	g_io_done = false;
5076 
5077 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 8, io_done, &io_ctx1);
5078 	CU_ASSERT(rc == 0);
5079 	CU_ASSERT(g_io_done == false);
5080 
5081 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5082 
5083 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5084 
5085 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5086 	CU_ASSERT(rc == 0);
5087 	CU_ASSERT(g_io_done == true);
5088 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5089 	stub_complete_io(2);
5090 	CU_ASSERT(g_abort_done == true);
5091 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5092 
5093 	/* Test that a multi-vector command that needs to be split by strip and then
5094 	 * needs to be split is aborted correctly. Abort is requested before the second
5095 	 * child I/O was submitted. The parent I/O should complete with failure without
5096 	 * submitting the second child I/O.
5097 	 */
5098 	for (i = 0; i < SPDK_BDEV_IO_NUM_CHILD_IOV * 2; i++) {
5099 		iov[i].iov_base = (void *)((i + 1) * 0x10000);
5100 		iov[i].iov_len = 512;
5101 	}
5102 
5103 	bdev->optimal_io_boundary = SPDK_BDEV_IO_NUM_CHILD_IOV;
5104 	g_io_done = false;
5105 	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, SPDK_BDEV_IO_NUM_CHILD_IOV * 2, 0,
5106 				    SPDK_BDEV_IO_NUM_CHILD_IOV * 2, io_done, &io_ctx1);
5107 	CU_ASSERT(rc == 0);
5108 	CU_ASSERT(g_io_done == false);
5109 
5110 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5111 
5112 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5113 
5114 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5115 	CU_ASSERT(rc == 0);
5116 	CU_ASSERT(g_io_done == true);
5117 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5118 	stub_complete_io(1);
5119 	CU_ASSERT(g_abort_done == true);
5120 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5121 
5122 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5123 
5124 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5125 
5126 	bdev->optimal_io_boundary = 16;
5127 	g_io_done = false;
5128 
5129 	/* Test that a ingle-vector command which is split is aborted correctly.
5130 	 * Differently from the above, the child abort request will be submitted
5131 	 * sequentially due to the capacity of spdk_bdev_io.
5132 	 */
5133 	rc = spdk_bdev_read_blocks(desc, io_ch, (void *)0xF000, 14, 50, io_done, &io_ctx1);
5134 	CU_ASSERT(rc == 0);
5135 	CU_ASSERT(g_io_done == false);
5136 
5137 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
5138 
5139 	g_abort_done = false;
5140 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5141 
5142 	rc = spdk_bdev_abort(desc, io_ch, &io_ctx1, abort_done, NULL);
5143 	CU_ASSERT(rc == 0);
5144 	CU_ASSERT(!TAILQ_EMPTY(&mgmt_ch->io_wait_queue));
5145 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
5146 
5147 	stub_complete_io(1);
5148 	CU_ASSERT(g_io_done == true);
5149 	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_FAILED);
5150 	stub_complete_io(3);
5151 	CU_ASSERT(g_abort_done == true);
5152 	CU_ASSERT(g_abort_status == SPDK_BDEV_IO_STATUS_SUCCESS);
5153 
5154 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5155 
5156 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5157 
5158 	spdk_put_io_channel(io_ch);
5159 	spdk_bdev_close(desc);
5160 	free_bdev(bdev);
5161 	ut_fini_bdev();
5162 }
5163 
5164 static void
5165 bdev_unmap(void)
5166 {
5167 	struct spdk_bdev *bdev;
5168 	struct spdk_bdev_desc *desc = NULL;
5169 	struct spdk_io_channel *ioch;
5170 	struct spdk_bdev_channel *bdev_ch;
5171 	struct ut_expected_io *expected_io;
5172 	struct spdk_bdev_opts bdev_opts = {};
5173 	uint32_t i, num_outstanding;
5174 	uint64_t offset, num_blocks, max_unmap_blocks, num_children;
5175 	int rc;
5176 
5177 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5178 	bdev_opts.bdev_io_pool_size = 512;
5179 	bdev_opts.bdev_io_cache_size = 64;
5180 	ut_init_bdev(&bdev_opts);
5181 
5182 	bdev = allocate_bdev("bdev");
5183 
5184 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
5185 	CU_ASSERT_EQUAL(rc, 0);
5186 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5187 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5188 	ioch = spdk_bdev_get_io_channel(desc);
5189 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
5190 	bdev_ch = spdk_io_channel_get_ctx(ioch);
5191 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
5192 
5193 	fn_table.submit_request = stub_submit_request;
5194 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5195 
5196 	/* Case 1: First test the request won't be split */
5197 	num_blocks = 32;
5198 
5199 	g_io_done = false;
5200 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, 0, num_blocks, 0);
5201 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5202 	rc = spdk_bdev_unmap_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5203 	CU_ASSERT_EQUAL(rc, 0);
5204 	CU_ASSERT(g_io_done == false);
5205 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5206 	stub_complete_io(1);
5207 	CU_ASSERT(g_io_done == true);
5208 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5209 
5210 	/* Case 2: Test the split with 2 children requests */
5211 	bdev->max_unmap = 8;
5212 	bdev->max_unmap_segments = 2;
5213 	max_unmap_blocks = bdev->max_unmap * bdev->max_unmap_segments;
5214 	num_blocks = max_unmap_blocks * 2;
5215 	offset = 0;
5216 
5217 	g_io_done = false;
5218 	for (i = 0; i < 2; i++) {
5219 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, offset, max_unmap_blocks, 0);
5220 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5221 		offset += max_unmap_blocks;
5222 	}
5223 
5224 	rc = spdk_bdev_unmap_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5225 	CU_ASSERT_EQUAL(rc, 0);
5226 	CU_ASSERT(g_io_done == false);
5227 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5228 	stub_complete_io(2);
5229 	CU_ASSERT(g_io_done == true);
5230 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5231 
5232 	/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
5233 	num_children = 15;
5234 	num_blocks = max_unmap_blocks * num_children;
5235 	g_io_done = false;
5236 	offset = 0;
5237 	for (i = 0; i < num_children; i++) {
5238 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_UNMAP, offset, max_unmap_blocks, 0);
5239 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5240 		offset += max_unmap_blocks;
5241 	}
5242 
5243 	rc = spdk_bdev_unmap_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5244 	CU_ASSERT_EQUAL(rc, 0);
5245 	CU_ASSERT(g_io_done == false);
5246 
5247 	while (num_children > 0) {
5248 		num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS);
5249 		CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
5250 		stub_complete_io(num_outstanding);
5251 		num_children -= num_outstanding;
5252 	}
5253 	CU_ASSERT(g_io_done == true);
5254 
5255 	spdk_put_io_channel(ioch);
5256 	spdk_bdev_close(desc);
5257 	free_bdev(bdev);
5258 	ut_fini_bdev();
5259 }
5260 
5261 static void
5262 bdev_write_zeroes_split_test(void)
5263 {
5264 	struct spdk_bdev *bdev;
5265 	struct spdk_bdev_desc *desc = NULL;
5266 	struct spdk_io_channel *ioch;
5267 	struct spdk_bdev_channel *bdev_ch;
5268 	struct ut_expected_io *expected_io;
5269 	struct spdk_bdev_opts bdev_opts = {};
5270 	uint32_t i, num_outstanding;
5271 	uint64_t offset, num_blocks, max_write_zeroes_blocks, num_children;
5272 	int rc;
5273 
5274 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5275 	bdev_opts.bdev_io_pool_size = 512;
5276 	bdev_opts.bdev_io_cache_size = 64;
5277 	ut_init_bdev(&bdev_opts);
5278 
5279 	bdev = allocate_bdev("bdev");
5280 
5281 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
5282 	CU_ASSERT_EQUAL(rc, 0);
5283 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5284 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5285 	ioch = spdk_bdev_get_io_channel(desc);
5286 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
5287 	bdev_ch = spdk_io_channel_get_ctx(ioch);
5288 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
5289 
5290 	fn_table.submit_request = stub_submit_request;
5291 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
5292 
5293 	/* Case 1: First test the request won't be split */
5294 	num_blocks = 32;
5295 
5296 	g_io_done = false;
5297 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, 0, num_blocks, 0);
5298 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5299 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5300 	CU_ASSERT_EQUAL(rc, 0);
5301 	CU_ASSERT(g_io_done == false);
5302 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5303 	stub_complete_io(1);
5304 	CU_ASSERT(g_io_done == true);
5305 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5306 
5307 	/* Case 2: Test the split with 2 children requests */
5308 	max_write_zeroes_blocks = 8;
5309 	bdev->max_write_zeroes = max_write_zeroes_blocks;
5310 	num_blocks = max_write_zeroes_blocks * 2;
5311 	offset = 0;
5312 
5313 	g_io_done = false;
5314 	for (i = 0; i < 2; i++) {
5315 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks,
5316 						   0);
5317 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5318 		offset += max_write_zeroes_blocks;
5319 	}
5320 
5321 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5322 	CU_ASSERT_EQUAL(rc, 0);
5323 	CU_ASSERT(g_io_done == false);
5324 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5325 	stub_complete_io(2);
5326 	CU_ASSERT(g_io_done == true);
5327 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5328 
5329 	/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
5330 	num_children = 15;
5331 	num_blocks = max_write_zeroes_blocks * num_children;
5332 	g_io_done = false;
5333 	offset = 0;
5334 	for (i = 0; i < num_children; i++) {
5335 		expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, offset, max_write_zeroes_blocks,
5336 						   0);
5337 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5338 		offset += max_write_zeroes_blocks;
5339 	}
5340 
5341 	rc = spdk_bdev_write_zeroes_blocks(desc, ioch, 0, num_blocks, io_done, NULL);
5342 	CU_ASSERT_EQUAL(rc, 0);
5343 	CU_ASSERT(g_io_done == false);
5344 
5345 	while (num_children > 0) {
5346 		num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS);
5347 		CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
5348 		stub_complete_io(num_outstanding);
5349 		num_children -= num_outstanding;
5350 	}
5351 	CU_ASSERT(g_io_done == true);
5352 
5353 	spdk_put_io_channel(ioch);
5354 	spdk_bdev_close(desc);
5355 	free_bdev(bdev);
5356 	ut_fini_bdev();
5357 }
5358 
5359 static void
5360 bdev_set_options_test(void)
5361 {
5362 	struct spdk_bdev_opts bdev_opts = {};
5363 	int rc;
5364 
5365 	/* Case1: Do not set opts_size */
5366 	rc = spdk_bdev_set_opts(&bdev_opts);
5367 	CU_ASSERT(rc == -1);
5368 
5369 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
5370 	bdev_opts.bdev_io_pool_size = 4;
5371 	bdev_opts.bdev_io_cache_size = 2;
5372 	bdev_opts.small_buf_pool_size = 4;
5373 
5374 	/* Case 2: Do not set valid small_buf_pool_size and large_buf_pool_size */
5375 	rc = spdk_bdev_set_opts(&bdev_opts);
5376 	CU_ASSERT(rc == -1);
5377 
5378 	/* Case 3: Do not set valid large_buf_pool_size */
5379 	bdev_opts.small_buf_pool_size = BUF_SMALL_POOL_SIZE;
5380 	bdev_opts.large_buf_pool_size = BUF_LARGE_POOL_SIZE - 1;
5381 	rc = spdk_bdev_set_opts(&bdev_opts);
5382 	CU_ASSERT(rc == -1);
5383 
5384 	/* Case4: set valid large buf_pool_size */
5385 	bdev_opts.large_buf_pool_size = BUF_LARGE_POOL_SIZE;
5386 	rc = spdk_bdev_set_opts(&bdev_opts);
5387 	CU_ASSERT(rc == 0);
5388 
5389 	/* Case5: Set different valid value for small and large buf pool */
5390 	bdev_opts.large_buf_pool_size = BUF_SMALL_POOL_SIZE + 3;
5391 	bdev_opts.large_buf_pool_size = BUF_LARGE_POOL_SIZE + 3;
5392 	rc = spdk_bdev_set_opts(&bdev_opts);
5393 	CU_ASSERT(rc == 0);
5394 }
5395 
5396 static uint64_t
5397 get_ns_time(void)
5398 {
5399 	int rc;
5400 	struct timespec ts;
5401 
5402 	rc = clock_gettime(CLOCK_MONOTONIC, &ts);
5403 	CU_ASSERT(rc == 0);
5404 	return ts.tv_sec * 1000 * 1000 * 1000 + ts.tv_nsec;
5405 }
5406 
5407 static int
5408 rb_tree_get_height(struct spdk_bdev_name *bdev_name)
5409 {
5410 	int h1, h2;
5411 
5412 	if (bdev_name == NULL) {
5413 		return -1;
5414 	} else {
5415 		h1 = rb_tree_get_height(RB_LEFT(bdev_name, node));
5416 		h2 = rb_tree_get_height(RB_RIGHT(bdev_name, node));
5417 
5418 		return spdk_max(h1, h2) + 1;
5419 	}
5420 }
5421 
5422 static void
5423 bdev_multi_allocation(void)
5424 {
5425 	const int max_bdev_num = 1024 * 16;
5426 	char name[max_bdev_num][16];
5427 	char noexist_name[] = "invalid_bdev";
5428 	struct spdk_bdev *bdev[max_bdev_num];
5429 	int i, j;
5430 	uint64_t last_time;
5431 	int bdev_num;
5432 	int height;
5433 
5434 	for (j = 0; j < max_bdev_num; j++) {
5435 		snprintf(name[j], sizeof(name[j]), "bdev%d", j);
5436 	}
5437 
5438 	for (i = 0; i < 16; i++) {
5439 		last_time = get_ns_time();
5440 		bdev_num = 1024 * (i + 1);
5441 		for (j = 0; j < bdev_num; j++) {
5442 			bdev[j] = allocate_bdev(name[j]);
5443 			height = rb_tree_get_height(&bdev[j]->internal.bdev_name);
5444 			CU_ASSERT(height <= (int)(spdk_u32log2(2 * j + 2)));
5445 		}
5446 		SPDK_NOTICELOG("alloc bdev num %d takes %" PRIu64 " ms\n", bdev_num,
5447 			       (get_ns_time() - last_time) / 1000 / 1000);
5448 		for (j = 0; j < bdev_num; j++) {
5449 			CU_ASSERT(spdk_bdev_get_by_name(name[j]) != NULL);
5450 		}
5451 		CU_ASSERT(spdk_bdev_get_by_name(noexist_name) == NULL);
5452 
5453 		for (j = 0; j < bdev_num; j++) {
5454 			free_bdev(bdev[j]);
5455 		}
5456 		for (j = 0; j < bdev_num; j++) {
5457 			CU_ASSERT(spdk_bdev_get_by_name(name[j]) == NULL);
5458 		}
5459 	}
5460 }
5461 
5462 static struct spdk_memory_domain *g_bdev_memory_domain = (struct spdk_memory_domain *) 0xf00df00d;
5463 
5464 static int
5465 test_bdev_get_supported_dma_device_types_op(void *ctx, struct spdk_memory_domain **domains,
5466 		int array_size)
5467 {
5468 	if (array_size > 0 && domains) {
5469 		domains[0] = g_bdev_memory_domain;
5470 	}
5471 
5472 	return 1;
5473 }
5474 
5475 static void
5476 bdev_get_memory_domains(void)
5477 {
5478 	struct spdk_bdev_fn_table fn_table = {
5479 		.get_memory_domains = test_bdev_get_supported_dma_device_types_op
5480 	};
5481 	struct spdk_bdev bdev = { .fn_table = &fn_table };
5482 	struct spdk_memory_domain *domains[2] = {};
5483 	int rc;
5484 
5485 	/* bdev is NULL */
5486 	rc = spdk_bdev_get_memory_domains(NULL, domains, 2);
5487 	CU_ASSERT(rc == -EINVAL);
5488 
5489 	/* domains is NULL */
5490 	rc = spdk_bdev_get_memory_domains(&bdev, NULL, 2);
5491 	CU_ASSERT(rc == 1);
5492 
5493 	/* array size is 0 */
5494 	rc = spdk_bdev_get_memory_domains(&bdev, domains, 0);
5495 	CU_ASSERT(rc == 1);
5496 
5497 	/* get_supported_dma_device_types op is set */
5498 	rc = spdk_bdev_get_memory_domains(&bdev, domains, 2);
5499 	CU_ASSERT(rc == 1);
5500 	CU_ASSERT(domains[0] == g_bdev_memory_domain);
5501 
5502 	/* get_supported_dma_device_types op is not set */
5503 	fn_table.get_memory_domains = NULL;
5504 	rc = spdk_bdev_get_memory_domains(&bdev, domains, 2);
5505 	CU_ASSERT(rc == 0);
5506 }
5507 
5508 static void
5509 _bdev_io_ext(struct spdk_bdev_ext_io_opts *ext_io_opts)
5510 {
5511 	struct spdk_bdev *bdev;
5512 	struct spdk_bdev_desc *desc = NULL;
5513 	struct spdk_io_channel *io_ch;
5514 	char io_buf[512];
5515 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5516 	struct ut_expected_io *expected_io;
5517 	int rc;
5518 
5519 	ut_init_bdev(NULL);
5520 
5521 	bdev = allocate_bdev("bdev0");
5522 	bdev->md_interleave = false;
5523 	bdev->md_len = 8;
5524 
5525 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5526 	CU_ASSERT(rc == 0);
5527 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5528 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5529 	io_ch = spdk_bdev_get_io_channel(desc);
5530 	CU_ASSERT(io_ch != NULL);
5531 
5532 	/* read */
5533 	g_io_done = false;
5534 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1);
5535 	if (ext_io_opts) {
5536 		expected_io->md_buf = ext_io_opts->metadata;
5537 	}
5538 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5539 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5540 
5541 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, ext_io_opts);
5542 
5543 	CU_ASSERT(rc == 0);
5544 	CU_ASSERT(g_io_done == false);
5545 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5546 	stub_complete_io(1);
5547 	CU_ASSERT(g_io_done == true);
5548 
5549 	/* write */
5550 	g_io_done = false;
5551 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
5552 	if (ext_io_opts) {
5553 		expected_io->md_buf = ext_io_opts->metadata;
5554 	}
5555 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5556 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5557 
5558 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, ext_io_opts);
5559 
5560 	CU_ASSERT(rc == 0);
5561 	CU_ASSERT(g_io_done == false);
5562 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5563 	stub_complete_io(1);
5564 	CU_ASSERT(g_io_done == true);
5565 
5566 	spdk_put_io_channel(io_ch);
5567 	spdk_bdev_close(desc);
5568 	free_bdev(bdev);
5569 	ut_fini_bdev();
5570 
5571 }
5572 
5573 static void
5574 bdev_io_ext(void)
5575 {
5576 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5577 		.metadata = (void *)0xFF000000,
5578 		.size = sizeof(ext_io_opts)
5579 	};
5580 
5581 	_bdev_io_ext(&ext_io_opts);
5582 }
5583 
5584 static void
5585 bdev_io_ext_no_opts(void)
5586 {
5587 	_bdev_io_ext(NULL);
5588 }
5589 
5590 static void
5591 bdev_io_ext_invalid_opts(void)
5592 {
5593 	struct spdk_bdev *bdev;
5594 	struct spdk_bdev_desc *desc = NULL;
5595 	struct spdk_io_channel *io_ch;
5596 	char io_buf[512];
5597 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5598 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5599 		.metadata = (void *)0xFF000000,
5600 		.size = sizeof(ext_io_opts)
5601 	};
5602 	int rc;
5603 
5604 	ut_init_bdev(NULL);
5605 
5606 	bdev = allocate_bdev("bdev0");
5607 	bdev->md_interleave = false;
5608 	bdev->md_len = 8;
5609 
5610 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5611 	CU_ASSERT(rc == 0);
5612 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5613 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5614 	io_ch = spdk_bdev_get_io_channel(desc);
5615 	CU_ASSERT(io_ch != NULL);
5616 
5617 	/* Test invalid ext_opts size */
5618 	ext_io_opts.size = 0;
5619 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5620 	CU_ASSERT(rc == -EINVAL);
5621 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5622 	CU_ASSERT(rc == -EINVAL);
5623 
5624 	ext_io_opts.size = sizeof(ext_io_opts) * 2;
5625 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5626 	CU_ASSERT(rc == -EINVAL);
5627 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5628 	CU_ASSERT(rc == -EINVAL);
5629 
5630 	ext_io_opts.size = offsetof(struct spdk_bdev_ext_io_opts, metadata) +
5631 			   sizeof(ext_io_opts.metadata) - 1;
5632 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5633 	CU_ASSERT(rc == -EINVAL);
5634 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5635 	CU_ASSERT(rc == -EINVAL);
5636 
5637 	spdk_put_io_channel(io_ch);
5638 	spdk_bdev_close(desc);
5639 	free_bdev(bdev);
5640 	ut_fini_bdev();
5641 }
5642 
5643 static void
5644 bdev_io_ext_split(void)
5645 {
5646 	struct spdk_bdev *bdev;
5647 	struct spdk_bdev_desc *desc = NULL;
5648 	struct spdk_io_channel *io_ch;
5649 	char io_buf[512];
5650 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5651 	struct ut_expected_io *expected_io;
5652 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5653 		.metadata = (void *)0xFF000000,
5654 		.size = sizeof(ext_io_opts)
5655 	};
5656 	int rc;
5657 
5658 	ut_init_bdev(NULL);
5659 
5660 	bdev = allocate_bdev("bdev0");
5661 	bdev->md_interleave = false;
5662 	bdev->md_len = 8;
5663 
5664 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5665 	CU_ASSERT(rc == 0);
5666 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5667 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5668 	io_ch = spdk_bdev_get_io_channel(desc);
5669 	CU_ASSERT(io_ch != NULL);
5670 
5671 	/* Check that IO request with ext_opts and metadata is split correctly
5672 	 * Offset 14, length 8, payload 0xF000
5673 	 *  Child - Offset 14, length 2, payload 0xF000
5674 	 *  Child - Offset 16, length 6, payload 0xF000 + 2 * 512
5675 	 */
5676 	bdev->optimal_io_boundary = 16;
5677 	bdev->split_on_optimal_io_boundary = true;
5678 	bdev->md_interleave = false;
5679 	bdev->md_len = 8;
5680 
5681 	iov.iov_base = (void *)0xF000;
5682 	iov.iov_len = 4096;
5683 	memset(&ext_io_opts, 0, sizeof(ext_io_opts));
5684 	ext_io_opts.metadata = (void *)0xFF000000;
5685 	ext_io_opts.size = sizeof(ext_io_opts);
5686 	g_io_done = false;
5687 
5688 	/* read */
5689 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 14, 2, 1);
5690 	expected_io->md_buf = ext_io_opts.metadata;
5691 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
5692 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5693 
5694 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 16, 6, 1);
5695 	expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
5696 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
5697 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5698 
5699 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
5700 	CU_ASSERT(rc == 0);
5701 	CU_ASSERT(g_io_done == false);
5702 
5703 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5704 	stub_complete_io(2);
5705 	CU_ASSERT(g_io_done == true);
5706 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5707 
5708 	/* write */
5709 	g_io_done = false;
5710 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 14, 2, 1);
5711 	expected_io->md_buf = ext_io_opts.metadata;
5712 	ut_expected_io_set_iov(expected_io, 0, (void *)0xF000, 2 * 512);
5713 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5714 
5715 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 16, 6, 1);
5716 	expected_io->md_buf = ext_io_opts.metadata + 2 * 8;
5717 	ut_expected_io_set_iov(expected_io, 0, (void *)(0xF000 + 2 * 512), 6 * 512);
5718 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5719 
5720 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 14, 8, io_done, NULL, &ext_io_opts);
5721 	CU_ASSERT(rc == 0);
5722 	CU_ASSERT(g_io_done == false);
5723 
5724 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 2);
5725 	stub_complete_io(2);
5726 	CU_ASSERT(g_io_done == true);
5727 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5728 
5729 	spdk_put_io_channel(io_ch);
5730 	spdk_bdev_close(desc);
5731 	free_bdev(bdev);
5732 	ut_fini_bdev();
5733 }
5734 
5735 static void
5736 bdev_io_ext_bounce_buffer(void)
5737 {
5738 	struct spdk_bdev *bdev;
5739 	struct spdk_bdev_desc *desc = NULL;
5740 	struct spdk_io_channel *io_ch;
5741 	char io_buf[512];
5742 	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
5743 	struct ut_expected_io *expected_io;
5744 	struct spdk_bdev_ext_io_opts ext_io_opts = {
5745 		.metadata = (void *)0xFF000000,
5746 		.size = sizeof(ext_io_opts)
5747 	};
5748 	int rc;
5749 
5750 	ut_init_bdev(NULL);
5751 
5752 	bdev = allocate_bdev("bdev0");
5753 	bdev->md_interleave = false;
5754 	bdev->md_len = 8;
5755 
5756 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5757 	CU_ASSERT(rc == 0);
5758 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5759 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5760 	io_ch = spdk_bdev_get_io_channel(desc);
5761 	CU_ASSERT(io_ch != NULL);
5762 
5763 	/* Verify data pull/push
5764 	 * bdev doesn't support memory domains, so buffers from bdev memory pool will be used */
5765 	ext_io_opts.memory_domain = (struct spdk_memory_domain *)0xdeadbeef;
5766 
5767 	/* read */
5768 	g_io_done = false;
5769 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 32, 14, 1);
5770 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5771 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5772 
5773 	rc = spdk_bdev_readv_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5774 
5775 	CU_ASSERT(rc == 0);
5776 	CU_ASSERT(g_io_done == false);
5777 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5778 	stub_complete_io(1);
5779 	CU_ASSERT(g_memory_domain_push_data_called == true);
5780 	CU_ASSERT(g_io_done == true);
5781 
5782 	/* write */
5783 	g_io_done = false;
5784 	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
5785 	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
5786 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
5787 
5788 	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
5789 
5790 	CU_ASSERT(rc == 0);
5791 	CU_ASSERT(g_memory_domain_pull_data_called == true);
5792 	CU_ASSERT(g_io_done == false);
5793 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
5794 	stub_complete_io(1);
5795 	CU_ASSERT(g_io_done == true);
5796 
5797 	spdk_put_io_channel(io_ch);
5798 	spdk_bdev_close(desc);
5799 	free_bdev(bdev);
5800 	ut_fini_bdev();
5801 }
5802 
5803 static void
5804 bdev_register_uuid_alias(void)
5805 {
5806 	struct spdk_bdev *bdev, *second;
5807 	char uuid[SPDK_UUID_STRING_LEN];
5808 	int rc;
5809 
5810 	ut_init_bdev(NULL);
5811 	bdev = allocate_bdev("bdev0");
5812 
5813 	/* Make sure an UUID was generated  */
5814 	CU_ASSERT_FALSE(spdk_mem_all_zero(&bdev->uuid, sizeof(bdev->uuid)));
5815 
5816 	/* Check that an UUID alias was registered */
5817 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
5818 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5819 
5820 	/* Unregister the bdev */
5821 	spdk_bdev_unregister(bdev, NULL, NULL);
5822 	poll_threads();
5823 	CU_ASSERT_PTR_NULL(spdk_bdev_get_by_name(uuid));
5824 
5825 	/* Check the same, but this time register the bdev with non-zero UUID */
5826 	rc = spdk_bdev_register(bdev);
5827 	CU_ASSERT_EQUAL(rc, 0);
5828 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5829 
5830 	/* Unregister the bdev */
5831 	spdk_bdev_unregister(bdev, NULL, NULL);
5832 	poll_threads();
5833 	CU_ASSERT_PTR_NULL(spdk_bdev_get_by_name(uuid));
5834 
5835 	/* Regiser the bdev using UUID as the name */
5836 	bdev->name = uuid;
5837 	rc = spdk_bdev_register(bdev);
5838 	CU_ASSERT_EQUAL(rc, 0);
5839 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5840 
5841 	/* Unregister the bdev */
5842 	spdk_bdev_unregister(bdev, NULL, NULL);
5843 	poll_threads();
5844 	CU_ASSERT_PTR_NULL(spdk_bdev_get_by_name(uuid));
5845 
5846 	/* Check that it's not possible to register two bdevs with the same UUIDs */
5847 	bdev->name = "bdev0";
5848 	second = allocate_bdev("bdev1");
5849 	spdk_uuid_copy(&bdev->uuid, &second->uuid);
5850 	rc = spdk_bdev_register(bdev);
5851 	CU_ASSERT_EQUAL(rc, -EEXIST);
5852 
5853 	/* Regenerate the UUID and re-check */
5854 	spdk_uuid_generate(&bdev->uuid);
5855 	rc = spdk_bdev_register(bdev);
5856 	CU_ASSERT_EQUAL(rc, 0);
5857 
5858 	/* And check that both bdevs can be retrieved through their UUIDs */
5859 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
5860 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), bdev);
5861 	spdk_uuid_fmt_lower(uuid, sizeof(uuid), &second->uuid);
5862 	CU_ASSERT_EQUAL(spdk_bdev_get_by_name(uuid), second);
5863 
5864 	free_bdev(second);
5865 	free_bdev(bdev);
5866 	ut_fini_bdev();
5867 }
5868 
5869 static void
5870 bdev_unregister_by_name(void)
5871 {
5872 	struct spdk_bdev *bdev;
5873 	int rc;
5874 
5875 	bdev = allocate_bdev("bdev");
5876 
5877 	g_event_type1 = 0xFF;
5878 	g_unregister_arg = NULL;
5879 	g_unregister_rc = -1;
5880 
5881 	rc = spdk_bdev_unregister_by_name("bdev1", &bdev_ut_if, bdev_unregister_cb, (void *)0x12345678);
5882 	CU_ASSERT(rc == -ENODEV);
5883 
5884 	rc = spdk_bdev_unregister_by_name("bdev", &vbdev_ut_if, bdev_unregister_cb, (void *)0x12345678);
5885 	CU_ASSERT(rc == -ENODEV);
5886 
5887 	rc = spdk_bdev_unregister_by_name("bdev", &bdev_ut_if, bdev_unregister_cb, (void *)0x12345678);
5888 	CU_ASSERT(rc == 0);
5889 
5890 	/* Check that unregister callback is delayed */
5891 	CU_ASSERT(g_unregister_arg == NULL);
5892 	CU_ASSERT(g_unregister_rc == -1);
5893 
5894 	poll_threads();
5895 
5896 	/* Event callback shall not be issued because device was closed */
5897 	CU_ASSERT(g_event_type1 == 0xFF);
5898 	/* Unregister callback is issued */
5899 	CU_ASSERT(g_unregister_arg == (void *)0x12345678);
5900 	CU_ASSERT(g_unregister_rc == 0);
5901 
5902 	free_bdev(bdev);
5903 }
5904 
5905 static int
5906 count_bdevs(void *ctx, struct spdk_bdev *bdev)
5907 {
5908 	int *count = ctx;
5909 
5910 	(*count)++;
5911 
5912 	return 0;
5913 }
5914 
5915 static void
5916 for_each_bdev_test(void)
5917 {
5918 	struct spdk_bdev *bdev[8];
5919 	int rc, count;
5920 
5921 	bdev[0] = allocate_bdev("bdev0");
5922 	bdev[0]->internal.status = SPDK_BDEV_STATUS_REMOVING;
5923 
5924 	bdev[1] = allocate_bdev("bdev1");
5925 	rc = spdk_bdev_module_claim_bdev(bdev[1], NULL, &bdev_ut_if);
5926 	CU_ASSERT(rc == 0);
5927 
5928 	bdev[2] = allocate_bdev("bdev2");
5929 
5930 	bdev[3] = allocate_bdev("bdev3");
5931 	rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if);
5932 	CU_ASSERT(rc == 0);
5933 
5934 	bdev[4] = allocate_bdev("bdev4");
5935 
5936 	bdev[5] = allocate_bdev("bdev5");
5937 	rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if);
5938 	CU_ASSERT(rc == 0);
5939 
5940 	bdev[6] = allocate_bdev("bdev6");
5941 
5942 	bdev[7] = allocate_bdev("bdev7");
5943 
5944 	count = 0;
5945 	rc = spdk_for_each_bdev(&count, count_bdevs);
5946 	CU_ASSERT(rc == 0);
5947 	CU_ASSERT(count == 7);
5948 
5949 	count = 0;
5950 	rc = spdk_for_each_bdev_leaf(&count, count_bdevs);
5951 	CU_ASSERT(rc == 0);
5952 	CU_ASSERT(count == 4);
5953 
5954 	bdev[0]->internal.status = SPDK_BDEV_STATUS_READY;
5955 	free_bdev(bdev[0]);
5956 	free_bdev(bdev[1]);
5957 	free_bdev(bdev[2]);
5958 	free_bdev(bdev[3]);
5959 	free_bdev(bdev[4]);
5960 	free_bdev(bdev[5]);
5961 	free_bdev(bdev[6]);
5962 	free_bdev(bdev[7]);
5963 }
5964 
5965 static void
5966 bdev_seek_test(void)
5967 {
5968 	struct spdk_bdev *bdev;
5969 	struct spdk_bdev_desc *desc = NULL;
5970 	struct spdk_io_channel *io_ch;
5971 	int rc;
5972 
5973 	ut_init_bdev(NULL);
5974 	poll_threads();
5975 
5976 	bdev = allocate_bdev("bdev0");
5977 
5978 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
5979 	CU_ASSERT(rc == 0);
5980 	poll_threads();
5981 	SPDK_CU_ASSERT_FATAL(desc != NULL);
5982 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
5983 	io_ch = spdk_bdev_get_io_channel(desc);
5984 	CU_ASSERT(io_ch != NULL);
5985 
5986 	/* Seek data not supported */
5987 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_DATA, false);
5988 	rc = spdk_bdev_seek_data(desc, io_ch, 0, bdev_seek_cb, NULL);
5989 	CU_ASSERT(rc == 0);
5990 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5991 	poll_threads();
5992 	CU_ASSERT(g_seek_offset == 0);
5993 
5994 	/* Seek hole not supported */
5995 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_HOLE, false);
5996 	rc = spdk_bdev_seek_hole(desc, io_ch, 0, bdev_seek_cb, NULL);
5997 	CU_ASSERT(rc == 0);
5998 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
5999 	poll_threads();
6000 	CU_ASSERT(g_seek_offset == UINT64_MAX);
6001 
6002 	/* Seek data supported */
6003 	g_seek_data_offset = 12345;
6004 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_DATA, true);
6005 	rc = spdk_bdev_seek_data(desc, io_ch, 0, bdev_seek_cb, NULL);
6006 	CU_ASSERT(rc == 0);
6007 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
6008 	stub_complete_io(1);
6009 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6010 	CU_ASSERT(g_seek_offset == 12345);
6011 
6012 	/* Seek hole supported */
6013 	g_seek_hole_offset = 67890;
6014 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_SEEK_HOLE, true);
6015 	rc = spdk_bdev_seek_hole(desc, io_ch, 0, bdev_seek_cb, NULL);
6016 	CU_ASSERT(rc == 0);
6017 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
6018 	stub_complete_io(1);
6019 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6020 	CU_ASSERT(g_seek_offset == 67890);
6021 
6022 	spdk_put_io_channel(io_ch);
6023 	spdk_bdev_close(desc);
6024 	free_bdev(bdev);
6025 	ut_fini_bdev();
6026 }
6027 
6028 static void
6029 bdev_copy(void)
6030 {
6031 	struct spdk_bdev *bdev;
6032 	struct spdk_bdev_desc *desc = NULL;
6033 	struct spdk_io_channel *ioch;
6034 	struct ut_expected_io *expected_io;
6035 	uint64_t src_offset, num_blocks;
6036 	uint32_t num_completed;
6037 	int rc;
6038 
6039 	ut_init_bdev(NULL);
6040 	bdev = allocate_bdev("bdev");
6041 
6042 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
6043 	CU_ASSERT_EQUAL(rc, 0);
6044 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6045 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
6046 	ioch = spdk_bdev_get_io_channel(desc);
6047 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
6048 
6049 	fn_table.submit_request = stub_submit_request;
6050 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
6051 
6052 	/* First test that if the bdev supports copy, the request won't be split */
6053 	bdev->md_len = 0;
6054 	bdev->blocklen = 4096;
6055 	num_blocks = 512;
6056 	src_offset = bdev->blockcnt - num_blocks;
6057 
6058 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 0, src_offset, num_blocks);
6059 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6060 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6061 	CU_ASSERT_EQUAL(rc, 0);
6062 	num_completed = stub_complete_io(1);
6063 	CU_ASSERT_EQUAL(num_completed, 1);
6064 
6065 	/* Check that if copy is not supported it'll fail */
6066 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_COPY, false);
6067 
6068 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6069 	CU_ASSERT_EQUAL(rc, -ENOTSUP);
6070 
6071 	ut_enable_io_type(SPDK_BDEV_IO_TYPE_COPY, true);
6072 	spdk_put_io_channel(ioch);
6073 	spdk_bdev_close(desc);
6074 	free_bdev(bdev);
6075 	ut_fini_bdev();
6076 }
6077 
6078 static void
6079 bdev_copy_split_test(void)
6080 {
6081 	struct spdk_bdev *bdev;
6082 	struct spdk_bdev_desc *desc = NULL;
6083 	struct spdk_io_channel *ioch;
6084 	struct spdk_bdev_channel *bdev_ch;
6085 	struct ut_expected_io *expected_io;
6086 	struct spdk_bdev_opts bdev_opts = {};
6087 	uint32_t i, num_outstanding;
6088 	uint64_t offset, src_offset, num_blocks, max_copy_blocks, num_children;
6089 	int rc;
6090 
6091 	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
6092 	bdev_opts.bdev_io_pool_size = 512;
6093 	bdev_opts.bdev_io_cache_size = 64;
6094 	rc = spdk_bdev_set_opts(&bdev_opts);
6095 	CU_ASSERT(rc == 0);
6096 
6097 	ut_init_bdev(NULL);
6098 	bdev = allocate_bdev("bdev");
6099 
6100 	rc = spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc);
6101 	CU_ASSERT_EQUAL(rc, 0);
6102 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6103 	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));
6104 	ioch = spdk_bdev_get_io_channel(desc);
6105 	SPDK_CU_ASSERT_FATAL(ioch != NULL);
6106 	bdev_ch = spdk_io_channel_get_ctx(ioch);
6107 	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));
6108 
6109 	fn_table.submit_request = stub_submit_request;
6110 	g_io_exp_status = SPDK_BDEV_IO_STATUS_SUCCESS;
6111 
6112 	/* Case 1: First test the request won't be split */
6113 	num_blocks = 32;
6114 	src_offset = bdev->blockcnt - num_blocks;
6115 
6116 	g_io_done = false;
6117 	expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, 0, src_offset, num_blocks);
6118 	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6119 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6120 	CU_ASSERT_EQUAL(rc, 0);
6121 	CU_ASSERT(g_io_done == false);
6122 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
6123 	stub_complete_io(1);
6124 	CU_ASSERT(g_io_done == true);
6125 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6126 
6127 	/* Case 2: Test the split with 2 children requests */
6128 	max_copy_blocks = 8;
6129 	bdev->max_copy = max_copy_blocks;
6130 	num_children = 2;
6131 	num_blocks = max_copy_blocks * num_children;
6132 	offset = 0;
6133 	src_offset = bdev->blockcnt - num_blocks;
6134 
6135 	g_io_done = false;
6136 	for (i = 0; i < num_children; i++) {
6137 		expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, offset,
6138 							src_offset + offset, max_copy_blocks);
6139 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6140 		offset += max_copy_blocks;
6141 	}
6142 
6143 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6144 	CU_ASSERT_EQUAL(rc, 0);
6145 	CU_ASSERT(g_io_done == false);
6146 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_children);
6147 	stub_complete_io(num_children);
6148 	CU_ASSERT(g_io_done == true);
6149 	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
6150 
6151 	/* Case 3: Test the split with 15 children requests, will finish 8 requests first */
6152 	num_children = 15;
6153 	num_blocks = max_copy_blocks * num_children;
6154 	offset = 0;
6155 	src_offset = bdev->blockcnt - num_blocks;
6156 
6157 	g_io_done = false;
6158 	for (i = 0; i < num_children; i++) {
6159 		expected_io = ut_alloc_expected_copy_io(SPDK_BDEV_IO_TYPE_COPY, offset,
6160 							src_offset + offset, max_copy_blocks);
6161 		TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);
6162 		offset += max_copy_blocks;
6163 	}
6164 
6165 	rc = spdk_bdev_copy_blocks(desc, ioch, 0, src_offset, num_blocks, io_done, NULL);
6166 	CU_ASSERT_EQUAL(rc, 0);
6167 	CU_ASSERT(g_io_done == false);
6168 
6169 	while (num_children > 0) {
6170 		num_outstanding = spdk_min(num_children, SPDK_BDEV_MAX_CHILDREN_COPY_REQS);
6171 		CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == num_outstanding);
6172 		stub_complete_io(num_outstanding);
6173 		num_children -= num_outstanding;
6174 	}
6175 	CU_ASSERT(g_io_done == true);
6176 
6177 	spdk_put_io_channel(ioch);
6178 	spdk_bdev_close(desc);
6179 	free_bdev(bdev);
6180 	ut_fini_bdev();
6181 }
6182 
6183 static void
6184 examine_claim_v1(struct spdk_bdev *bdev)
6185 {
6186 	int rc;
6187 
6188 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &vbdev_ut_if);
6189 	CU_ASSERT(rc == 0);
6190 }
6191 
6192 static void
6193 examine_no_lock_held(struct spdk_bdev *bdev)
6194 {
6195 	CU_ASSERT(!spdk_spin_held(&g_bdev_mgr.spinlock));
6196 	CU_ASSERT(!spdk_spin_held(&bdev->internal.spinlock));
6197 }
6198 
6199 struct examine_claim_v2_ctx {
6200 	struct ut_examine_ctx examine_ctx;
6201 	enum spdk_bdev_claim_type claim_type;
6202 	struct spdk_bdev_desc *desc;
6203 };
6204 
6205 static void
6206 examine_claim_v2(struct spdk_bdev *bdev)
6207 {
6208 	struct examine_claim_v2_ctx *ctx = bdev->ctxt;
6209 	int rc;
6210 
6211 	rc = spdk_bdev_open_ext(bdev->name, false, bdev_ut_event_cb, NULL, &ctx->desc);
6212 	CU_ASSERT(rc == 0);
6213 
6214 	rc = spdk_bdev_module_claim_bdev_desc(ctx->desc, ctx->claim_type, NULL, &vbdev_ut_if);
6215 	CU_ASSERT(rc == 0);
6216 }
6217 
6218 static void
6219 examine_locks(void)
6220 {
6221 	struct spdk_bdev *bdev;
6222 	struct ut_examine_ctx ctx = { 0 };
6223 	struct examine_claim_v2_ctx v2_ctx;
6224 
6225 	/* Without any claims, one code path is taken */
6226 	ctx.examine_config = examine_no_lock_held;
6227 	ctx.examine_disk = examine_no_lock_held;
6228 	bdev = allocate_bdev_ctx("bdev0", &ctx);
6229 	CU_ASSERT(ctx.examine_config_count == 1);
6230 	CU_ASSERT(ctx.examine_disk_count == 1);
6231 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6232 	CU_ASSERT(bdev->internal.claim.v1.module == NULL);
6233 	free_bdev(bdev);
6234 
6235 	/* Exercise another path that is taken when examine_config() takes a v1 claim. */
6236 	memset(&ctx, 0, sizeof(ctx));
6237 	ctx.examine_config = examine_claim_v1;
6238 	ctx.examine_disk = examine_no_lock_held;
6239 	bdev = allocate_bdev_ctx("bdev0", &ctx);
6240 	CU_ASSERT(ctx.examine_config_count == 1);
6241 	CU_ASSERT(ctx.examine_disk_count == 1);
6242 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
6243 	CU_ASSERT(bdev->internal.claim.v1.module == &vbdev_ut_if);
6244 	spdk_bdev_module_release_bdev(bdev);
6245 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6246 	CU_ASSERT(bdev->internal.claim.v1.module == NULL);
6247 	free_bdev(bdev);
6248 
6249 	/* Exercise the final path that comes with v2 claims. */
6250 	memset(&v2_ctx, 0, sizeof(v2_ctx));
6251 	v2_ctx.examine_ctx.examine_config = examine_claim_v2;
6252 	v2_ctx.examine_ctx.examine_disk = examine_no_lock_held;
6253 	v2_ctx.claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6254 	bdev = allocate_bdev_ctx("bdev0", &v2_ctx);
6255 	CU_ASSERT(v2_ctx.examine_ctx.examine_config_count == 1);
6256 	CU_ASSERT(v2_ctx.examine_ctx.examine_disk_count == 1);
6257 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6258 	spdk_bdev_close(v2_ctx.desc);
6259 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6260 	free_bdev(bdev);
6261 }
6262 
6263 #define UT_ASSERT_CLAIM_V2_COUNT(bdev, expect) \
6264 	do { \
6265 		uint32_t len = 0; \
6266 		struct spdk_bdev_module_claim *claim; \
6267 		TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) { \
6268 			len++; \
6269 		} \
6270 		CU_ASSERT(len == expect); \
6271 	} while (0)
6272 
6273 static void
6274 claim_v2_rwo(void)
6275 {
6276 	struct spdk_bdev *bdev;
6277 	struct spdk_bdev_desc *desc;
6278 	struct spdk_bdev_desc *desc2;
6279 	struct spdk_bdev_claim_opts opts;
6280 	int rc;
6281 
6282 	bdev = allocate_bdev("bdev0");
6283 
6284 	/* Claim without options */
6285 	desc = NULL;
6286 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6287 	CU_ASSERT(rc == 0);
6288 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6289 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6290 					      &bdev_ut_if);
6291 	CU_ASSERT(rc == 0);
6292 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6293 	CU_ASSERT(desc->claim != NULL);
6294 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6295 	CU_ASSERT(strcmp(desc->claim->name, "") == 0);
6296 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6297 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6298 
6299 	/* Release the claim by closing the descriptor */
6300 	spdk_bdev_close(desc);
6301 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6302 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6303 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6304 
6305 	/* Claim with options */
6306 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6307 	snprintf(opts.name, sizeof(opts.name), "%s", "claim with options");
6308 	desc = NULL;
6309 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6310 	CU_ASSERT(rc == 0);
6311 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6312 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, &opts,
6313 					      &bdev_ut_if);
6314 	CU_ASSERT(rc == 0);
6315 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6316 	CU_ASSERT(desc->claim != NULL);
6317 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6318 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6319 	memset(&opts, 0, sizeof(opts));
6320 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6321 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6322 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6323 
6324 	/* The claim blocks new writers. */
6325 	desc2 = NULL;
6326 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6327 	CU_ASSERT(rc == -EPERM);
6328 	CU_ASSERT(desc2 == NULL);
6329 
6330 	/* New readers are allowed */
6331 	desc2 = NULL;
6332 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc2);
6333 	CU_ASSERT(rc == 0);
6334 	CU_ASSERT(desc2 != NULL);
6335 	CU_ASSERT(!desc2->write);
6336 
6337 	/* No new v2 RWO claims are allowed */
6338 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6339 					      &bdev_ut_if);
6340 	CU_ASSERT(rc == -EPERM);
6341 
6342 	/* No new v2 ROM claims are allowed */
6343 	CU_ASSERT(!desc2->write);
6344 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6345 					      &bdev_ut_if);
6346 	CU_ASSERT(rc == -EPERM);
6347 	CU_ASSERT(!desc2->write);
6348 
6349 	/* No new v2 RWM claims are allowed */
6350 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6351 	opts.shared_claim_key = (uint64_t)&opts;
6352 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6353 					      &bdev_ut_if);
6354 	CU_ASSERT(rc == -EPERM);
6355 	CU_ASSERT(!desc2->write);
6356 
6357 	/* No new v1 claims are allowed */
6358 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6359 	CU_ASSERT(rc == -EPERM);
6360 
6361 	/* None of the above changed the existing claim */
6362 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6363 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6364 
6365 	/* Closing the first descriptor now allows a new claim and it is promoted to rw. */
6366 	spdk_bdev_close(desc);
6367 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6368 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6369 	CU_ASSERT(!desc2->write);
6370 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6371 					      &bdev_ut_if);
6372 	CU_ASSERT(rc == 0);
6373 	CU_ASSERT(desc2->claim != NULL);
6374 	CU_ASSERT(desc2->write);
6375 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6376 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc2->claim);
6377 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6378 	spdk_bdev_close(desc2);
6379 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6380 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6381 
6382 	/* Cannot claim with a key */
6383 	desc = NULL;
6384 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6385 	CU_ASSERT(rc == 0);
6386 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6387 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6388 	opts.shared_claim_key = (uint64_t)&opts;
6389 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, &opts,
6390 					      &bdev_ut_if);
6391 	CU_ASSERT(rc == -EINVAL);
6392 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6393 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6394 	spdk_bdev_close(desc);
6395 
6396 	/* Clean up */
6397 	free_bdev(bdev);
6398 }
6399 
6400 static void
6401 claim_v2_rom(void)
6402 {
6403 	struct spdk_bdev *bdev;
6404 	struct spdk_bdev_desc *desc;
6405 	struct spdk_bdev_desc *desc2;
6406 	struct spdk_bdev_claim_opts opts;
6407 	int rc;
6408 
6409 	bdev = allocate_bdev("bdev0");
6410 
6411 	/* Claim without options */
6412 	desc = NULL;
6413 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6414 	CU_ASSERT(rc == 0);
6415 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6416 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6417 					      &bdev_ut_if);
6418 	CU_ASSERT(rc == 0);
6419 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6420 	CU_ASSERT(desc->claim != NULL);
6421 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6422 	CU_ASSERT(strcmp(desc->claim->name, "") == 0);
6423 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6424 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6425 
6426 	/* Release the claim by closing the descriptor */
6427 	spdk_bdev_close(desc);
6428 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6429 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6430 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6431 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6432 
6433 	/* Claim with options */
6434 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6435 	snprintf(opts.name, sizeof(opts.name), "%s", "claim with options");
6436 	desc = NULL;
6437 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6438 	CU_ASSERT(rc == 0);
6439 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6440 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, &opts,
6441 					      &bdev_ut_if);
6442 	CU_ASSERT(rc == 0);
6443 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6444 	SPDK_CU_ASSERT_FATAL(desc->claim != NULL);
6445 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6446 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6447 	memset(&opts, 0, sizeof(opts));
6448 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6449 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6450 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6451 
6452 	/* The claim blocks new writers. */
6453 	desc2 = NULL;
6454 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6455 	CU_ASSERT(rc == -EPERM);
6456 	CU_ASSERT(desc2 == NULL);
6457 
6458 	/* New readers are allowed */
6459 	desc2 = NULL;
6460 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc2);
6461 	CU_ASSERT(rc == 0);
6462 	CU_ASSERT(desc2 != NULL);
6463 	CU_ASSERT(!desc2->write);
6464 
6465 	/* No new v2 RWO claims are allowed */
6466 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6467 					      &bdev_ut_if);
6468 	CU_ASSERT(rc == -EPERM);
6469 
6470 	/* No new v2 RWM claims are allowed */
6471 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6472 	opts.shared_claim_key = (uint64_t)&opts;
6473 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6474 					      &bdev_ut_if);
6475 	CU_ASSERT(rc == -EPERM);
6476 	CU_ASSERT(!desc2->write);
6477 
6478 	/* No new v1 claims are allowed */
6479 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6480 	CU_ASSERT(rc == -EPERM);
6481 
6482 	/* None of the above messed up the existing claim */
6483 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6484 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6485 
6486 	/* New v2 ROM claims are allowed and the descriptor stays read-only. */
6487 	CU_ASSERT(!desc2->write);
6488 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6489 					      &bdev_ut_if);
6490 	CU_ASSERT(rc == 0);
6491 	CU_ASSERT(!desc2->write);
6492 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6493 	CU_ASSERT(TAILQ_NEXT(desc->claim, link) == desc2->claim);
6494 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 2);
6495 
6496 	/* Claim remains when closing the first descriptor */
6497 	spdk_bdev_close(desc);
6498 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
6499 	CU_ASSERT(!TAILQ_EMPTY(&bdev->internal.open_descs));
6500 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc2->claim);
6501 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6502 
6503 	/* Claim removed when closing the other descriptor */
6504 	spdk_bdev_close(desc2);
6505 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6506 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6507 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6508 
6509 	/* Cannot claim with a key */
6510 	desc = NULL;
6511 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6512 	CU_ASSERT(rc == 0);
6513 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6514 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6515 	opts.shared_claim_key = (uint64_t)&opts;
6516 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, &opts,
6517 					      &bdev_ut_if);
6518 	CU_ASSERT(rc == -EINVAL);
6519 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6520 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6521 	spdk_bdev_close(desc);
6522 
6523 	/* Cannot claim with a read-write descriptor */
6524 	desc = NULL;
6525 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6526 	CU_ASSERT(rc == 0);
6527 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6528 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6529 					      &bdev_ut_if);
6530 	CU_ASSERT(rc == -EINVAL);
6531 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6532 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6533 	spdk_bdev_close(desc);
6534 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6535 
6536 	/* Clean up */
6537 	free_bdev(bdev);
6538 }
6539 
6540 static void
6541 claim_v2_rwm(void)
6542 {
6543 	struct spdk_bdev *bdev;
6544 	struct spdk_bdev_desc *desc;
6545 	struct spdk_bdev_desc *desc2;
6546 	struct spdk_bdev_claim_opts opts;
6547 	char good_key, bad_key;
6548 	int rc;
6549 
6550 	bdev = allocate_bdev("bdev0");
6551 
6552 	/* Claim without options should fail */
6553 	desc = NULL;
6554 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6555 	CU_ASSERT(rc == 0);
6556 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6557 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, NULL,
6558 					      &bdev_ut_if);
6559 	CU_ASSERT(rc == -EINVAL);
6560 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6561 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 0);
6562 	CU_ASSERT(desc->claim == NULL);
6563 
6564 	/* Claim with options */
6565 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6566 	snprintf(opts.name, sizeof(opts.name), "%s", "claim with options");
6567 	opts.shared_claim_key = (uint64_t)&good_key;
6568 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6569 					      &bdev_ut_if);
6570 	CU_ASSERT(rc == 0);
6571 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED);
6572 	SPDK_CU_ASSERT_FATAL(desc->claim != NULL);
6573 	CU_ASSERT(desc->claim->module == &bdev_ut_if);
6574 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6575 	memset(&opts, 0, sizeof(opts));
6576 	CU_ASSERT(strcmp(desc->claim->name, "claim with options") == 0);
6577 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6578 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6579 
6580 	/* The claim blocks new writers. */
6581 	desc2 = NULL;
6582 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6583 	CU_ASSERT(rc == -EPERM);
6584 	CU_ASSERT(desc2 == NULL);
6585 
6586 	/* New readers are allowed */
6587 	desc2 = NULL;
6588 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc2);
6589 	CU_ASSERT(rc == 0);
6590 	CU_ASSERT(desc2 != NULL);
6591 	CU_ASSERT(!desc2->write);
6592 
6593 	/* No new v2 RWO claims are allowed */
6594 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE, NULL,
6595 					      &bdev_ut_if);
6596 	CU_ASSERT(rc == -EPERM);
6597 
6598 	/* No new v2 ROM claims are allowed and the descriptor stays read-only. */
6599 	CU_ASSERT(!desc2->write);
6600 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE, NULL,
6601 					      &bdev_ut_if);
6602 	CU_ASSERT(rc == -EPERM);
6603 	CU_ASSERT(!desc2->write);
6604 
6605 	/* No new v1 claims are allowed */
6606 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6607 	CU_ASSERT(rc == -EPERM);
6608 
6609 	/* No new v2 RWM claims are allowed if the key does not match */
6610 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6611 	opts.shared_claim_key = (uint64_t)&bad_key;
6612 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6613 					      &bdev_ut_if);
6614 	CU_ASSERT(rc == -EPERM);
6615 	CU_ASSERT(!desc2->write);
6616 
6617 	/* None of the above messed up the existing claim */
6618 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc->claim);
6619 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6620 
6621 	/* New v2 RWM claims are allowed and the descriptor is promoted if the key matches. */
6622 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6623 	opts.shared_claim_key = (uint64_t)&good_key;
6624 	CU_ASSERT(!desc2->write);
6625 	rc = spdk_bdev_module_claim_bdev_desc(desc2, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6626 					      &bdev_ut_if);
6627 	CU_ASSERT(rc == 0);
6628 	CU_ASSERT(desc2->write);
6629 	CU_ASSERT(TAILQ_NEXT(desc->claim, link) == desc2->claim);
6630 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 2);
6631 
6632 	/* Claim remains when closing the first descriptor */
6633 	spdk_bdev_close(desc);
6634 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED);
6635 	CU_ASSERT(!TAILQ_EMPTY(&bdev->internal.open_descs));
6636 	CU_ASSERT(TAILQ_FIRST(&bdev->internal.claim.v2.claims) == desc2->claim);
6637 	UT_ASSERT_CLAIM_V2_COUNT(bdev, 1);
6638 
6639 	/* Claim removed when closing the other descriptor */
6640 	spdk_bdev_close(desc2);
6641 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6642 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6643 
6644 	/* Cannot claim without a key */
6645 	desc = NULL;
6646 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6647 	CU_ASSERT(rc == 0);
6648 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6649 	spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6650 	rc = spdk_bdev_module_claim_bdev_desc(desc, SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED, &opts,
6651 					      &bdev_ut_if);
6652 	CU_ASSERT(rc == -EINVAL);
6653 	spdk_bdev_close(desc);
6654 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6655 	CU_ASSERT(TAILQ_EMPTY(&bdev->internal.open_descs));
6656 
6657 	/* Clean up */
6658 	free_bdev(bdev);
6659 }
6660 
6661 static void
6662 claim_v2_existing_writer(void)
6663 {
6664 	struct spdk_bdev *bdev;
6665 	struct spdk_bdev_desc *desc;
6666 	struct spdk_bdev_desc *desc2;
6667 	struct spdk_bdev_claim_opts opts;
6668 	enum spdk_bdev_claim_type type;
6669 	enum spdk_bdev_claim_type types[] = {
6670 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE,
6671 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED,
6672 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE
6673 	};
6674 	size_t i;
6675 	int rc;
6676 
6677 	bdev = allocate_bdev("bdev0");
6678 
6679 	desc = NULL;
6680 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc);
6681 	CU_ASSERT(rc == 0);
6682 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6683 	desc2 = NULL;
6684 	rc = spdk_bdev_open_ext("bdev0", true, bdev_ut_event_cb, NULL, &desc2);
6685 	CU_ASSERT(rc == 0);
6686 	SPDK_CU_ASSERT_FATAL(desc2 != NULL);
6687 
6688 	for (i = 0; i < SPDK_COUNTOF(types); i++) {
6689 		type = types[i];
6690 		spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6691 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED) {
6692 			opts.shared_claim_key = (uint64_t)&opts;
6693 		}
6694 		rc = spdk_bdev_module_claim_bdev_desc(desc, type, &opts, &bdev_ut_if);
6695 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE) {
6696 			CU_ASSERT(rc == -EINVAL);
6697 		} else {
6698 			CU_ASSERT(rc == -EPERM);
6699 		}
6700 		CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6701 		rc = spdk_bdev_module_claim_bdev_desc(desc2, type, &opts, &bdev_ut_if);
6702 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE) {
6703 			CU_ASSERT(rc == -EINVAL);
6704 		} else {
6705 			CU_ASSERT(rc == -EPERM);
6706 		}
6707 		CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE);
6708 	}
6709 
6710 	spdk_bdev_close(desc);
6711 	spdk_bdev_close(desc2);
6712 
6713 	/* Clean up */
6714 	free_bdev(bdev);
6715 }
6716 
6717 static void
6718 claim_v2_existing_v1(void)
6719 {
6720 	struct spdk_bdev *bdev;
6721 	struct spdk_bdev_desc *desc;
6722 	struct spdk_bdev_claim_opts opts;
6723 	enum spdk_bdev_claim_type type;
6724 	enum spdk_bdev_claim_type types[] = {
6725 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE,
6726 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED,
6727 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE
6728 	};
6729 	size_t i;
6730 	int rc;
6731 
6732 	bdev = allocate_bdev("bdev0");
6733 
6734 	rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6735 	CU_ASSERT(rc == 0);
6736 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
6737 
6738 	desc = NULL;
6739 	rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6740 	CU_ASSERT(rc == 0);
6741 	SPDK_CU_ASSERT_FATAL(desc != NULL);
6742 
6743 	for (i = 0; i < SPDK_COUNTOF(types); i++) {
6744 		type = types[i];
6745 		spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6746 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED) {
6747 			opts.shared_claim_key = (uint64_t)&opts;
6748 		}
6749 		rc = spdk_bdev_module_claim_bdev_desc(desc, type, &opts, &bdev_ut_if);
6750 		CU_ASSERT(rc == -EPERM);
6751 		CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
6752 	}
6753 
6754 	spdk_bdev_module_release_bdev(bdev);
6755 	spdk_bdev_close(desc);
6756 
6757 	/* Clean up */
6758 	free_bdev(bdev);
6759 }
6760 
6761 static void
6762 claim_v1_existing_v2(void)
6763 {
6764 	struct spdk_bdev *bdev;
6765 	struct spdk_bdev_desc *desc;
6766 	struct spdk_bdev_claim_opts opts;
6767 	enum spdk_bdev_claim_type type;
6768 	enum spdk_bdev_claim_type types[] = {
6769 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE,
6770 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED,
6771 		SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE
6772 	};
6773 	size_t i;
6774 	int rc;
6775 
6776 	bdev = allocate_bdev("bdev0");
6777 
6778 	for (i = 0; i < SPDK_COUNTOF(types); i++) {
6779 		type = types[i];
6780 
6781 		desc = NULL;
6782 		rc = spdk_bdev_open_ext("bdev0", false, bdev_ut_event_cb, NULL, &desc);
6783 		CU_ASSERT(rc == 0);
6784 		SPDK_CU_ASSERT_FATAL(desc != NULL);
6785 
6786 		/* Get a v2 claim */
6787 		spdk_bdev_claim_opts_init(&opts, sizeof(opts));
6788 		if (type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED) {
6789 			opts.shared_claim_key = (uint64_t)&opts;
6790 		}
6791 		rc = spdk_bdev_module_claim_bdev_desc(desc, type, &opts, &bdev_ut_if);
6792 		CU_ASSERT(rc == 0);
6793 
6794 		/* Fail to get a v1 claim */
6795 		rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6796 		CU_ASSERT(rc == -EPERM);
6797 
6798 		spdk_bdev_close(desc);
6799 
6800 		/* Now v1 succeeds */
6801 		rc = spdk_bdev_module_claim_bdev(bdev, NULL, &bdev_ut_if);
6802 		CU_ASSERT(rc == 0)
6803 		spdk_bdev_module_release_bdev(bdev);
6804 	}
6805 
6806 	/* Clean up */
6807 	free_bdev(bdev);
6808 }
6809 
6810 static void ut_examine_claimed_config0(struct spdk_bdev *bdev);
6811 static void ut_examine_claimed_disk0(struct spdk_bdev *bdev);
6812 static void ut_examine_claimed_config1(struct spdk_bdev *bdev);
6813 static void ut_examine_claimed_disk1(struct spdk_bdev *bdev);
6814 
6815 #define UT_MAX_EXAMINE_MODS 2
6816 struct spdk_bdev_module examine_claimed_mods[UT_MAX_EXAMINE_MODS] = {
6817 	{
6818 		.name = "vbdev_ut_examine0",
6819 		.module_init = vbdev_ut_module_init,
6820 		.module_fini = vbdev_ut_module_fini,
6821 		.examine_config = ut_examine_claimed_config0,
6822 		.examine_disk = ut_examine_claimed_disk0,
6823 	},
6824 	{
6825 		.name = "vbdev_ut_examine1",
6826 		.module_init = vbdev_ut_module_init,
6827 		.module_fini = vbdev_ut_module_fini,
6828 		.examine_config = ut_examine_claimed_config1,
6829 		.examine_disk = ut_examine_claimed_disk1,
6830 	}
6831 };
6832 
6833 SPDK_BDEV_MODULE_REGISTER(bdev_ut_claimed0, &examine_claimed_mods[0])
6834 SPDK_BDEV_MODULE_REGISTER(bdev_ut_claimed1, &examine_claimed_mods[1])
6835 
6836 struct ut_examine_claimed_ctx {
6837 	uint32_t examine_config_count;
6838 	uint32_t examine_disk_count;
6839 
6840 	/* Claim type to take, with these options */
6841 	enum spdk_bdev_claim_type claim_type;
6842 	struct spdk_bdev_claim_opts claim_opts;
6843 
6844 	/* Expected return value from spdk_bdev_module_claim_bdev_desc() */
6845 	int expect_claim_err;
6846 
6847 	/* Descriptor used for a claim */
6848 	struct spdk_bdev_desc *desc;
6849 } examine_claimed_ctx[UT_MAX_EXAMINE_MODS];
6850 
6851 bool ut_testing_examine_claimed;
6852 
6853 static void
6854 reset_examine_claimed_ctx(void)
6855 {
6856 	struct ut_examine_claimed_ctx *ctx;
6857 	uint32_t i;
6858 
6859 	for (i = 0; i < SPDK_COUNTOF(examine_claimed_ctx); i++) {
6860 		ctx = &examine_claimed_ctx[i];
6861 		if (ctx->desc != NULL) {
6862 			spdk_bdev_close(ctx->desc);
6863 		}
6864 		memset(ctx, 0, sizeof(*ctx));
6865 		spdk_bdev_claim_opts_init(&ctx->claim_opts, sizeof(ctx->claim_opts));
6866 	}
6867 }
6868 
6869 static void
6870 examine_claimed_config(struct spdk_bdev *bdev, uint32_t modnum)
6871 {
6872 	SPDK_CU_ASSERT_FATAL(modnum < UT_MAX_EXAMINE_MODS);
6873 	struct spdk_bdev_module *module = &examine_claimed_mods[modnum];
6874 	struct ut_examine_claimed_ctx *ctx = &examine_claimed_ctx[modnum];
6875 	int rc;
6876 
6877 	if (!ut_testing_examine_claimed) {
6878 		spdk_bdev_module_examine_done(module);
6879 		return;
6880 	}
6881 
6882 	ctx->examine_config_count++;
6883 
6884 	if (ctx->claim_type != SPDK_BDEV_CLAIM_NONE) {
6885 		rc = spdk_bdev_open_ext(bdev->name, false, bdev_ut_event_cb, &ctx->claim_opts,
6886 					&ctx->desc);
6887 		CU_ASSERT(rc == 0);
6888 
6889 		rc = spdk_bdev_module_claim_bdev_desc(ctx->desc, ctx->claim_type, NULL, module);
6890 		CU_ASSERT(rc == ctx->expect_claim_err);
6891 	}
6892 	spdk_bdev_module_examine_done(module);
6893 }
6894 
6895 static void
6896 ut_examine_claimed_config0(struct spdk_bdev *bdev)
6897 {
6898 	examine_claimed_config(bdev, 0);
6899 }
6900 
6901 static void
6902 ut_examine_claimed_config1(struct spdk_bdev *bdev)
6903 {
6904 	examine_claimed_config(bdev, 1);
6905 }
6906 
6907 static void
6908 examine_claimed_disk(struct spdk_bdev *bdev, uint32_t modnum)
6909 {
6910 	SPDK_CU_ASSERT_FATAL(modnum < UT_MAX_EXAMINE_MODS);
6911 	struct spdk_bdev_module *module = &examine_claimed_mods[modnum];
6912 	struct ut_examine_claimed_ctx *ctx = &examine_claimed_ctx[modnum];
6913 
6914 	if (!ut_testing_examine_claimed) {
6915 		spdk_bdev_module_examine_done(module);
6916 		return;
6917 	}
6918 
6919 	ctx->examine_disk_count++;
6920 
6921 	spdk_bdev_module_examine_done(module);
6922 }
6923 
6924 static void
6925 ut_examine_claimed_disk0(struct spdk_bdev *bdev)
6926 {
6927 	examine_claimed_disk(bdev, 0);
6928 }
6929 
6930 static void
6931 ut_examine_claimed_disk1(struct spdk_bdev *bdev)
6932 {
6933 	examine_claimed_disk(bdev, 1);
6934 }
6935 
6936 static void
6937 examine_claimed(void)
6938 {
6939 	struct spdk_bdev *bdev;
6940 	struct spdk_bdev_module *mod = examine_claimed_mods;
6941 	struct ut_examine_claimed_ctx *ctx = examine_claimed_ctx;
6942 
6943 	ut_testing_examine_claimed = true;
6944 	reset_examine_claimed_ctx();
6945 
6946 	/*
6947 	 * With one module claiming, both modules' examine_config should be called, but only the
6948 	 * claiming module's examine_disk should be called.
6949 	 */
6950 	ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6951 	bdev = allocate_bdev("bdev0");
6952 	CU_ASSERT(ctx[0].examine_config_count == 1);
6953 	CU_ASSERT(ctx[0].examine_disk_count == 1);
6954 	SPDK_CU_ASSERT_FATAL(ctx[0].desc != NULL);
6955 	CU_ASSERT(ctx[0].desc->claim->module == &mod[0]);
6956 	CU_ASSERT(ctx[1].examine_config_count == 1);
6957 	CU_ASSERT(ctx[1].examine_disk_count == 0);
6958 	CU_ASSERT(ctx[1].desc == NULL);
6959 	reset_examine_claimed_ctx();
6960 	free_bdev(bdev);
6961 
6962 	/*
6963 	 * With two modules claiming, both modules' examine_config and examine_disk should be
6964 	 * called.
6965 	 */
6966 	ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6967 	ctx[1].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6968 	bdev = allocate_bdev("bdev0");
6969 	CU_ASSERT(ctx[0].examine_config_count == 1);
6970 	CU_ASSERT(ctx[0].examine_disk_count == 1);
6971 	SPDK_CU_ASSERT_FATAL(ctx[0].desc != NULL);
6972 	CU_ASSERT(ctx[0].desc->claim->module == &mod[0]);
6973 	CU_ASSERT(ctx[1].examine_config_count == 1);
6974 	CU_ASSERT(ctx[1].examine_disk_count == 1);
6975 	SPDK_CU_ASSERT_FATAL(ctx[1].desc != NULL);
6976 	CU_ASSERT(ctx[1].desc->claim->module == &mod[1]);
6977 	reset_examine_claimed_ctx();
6978 	free_bdev(bdev);
6979 
6980 	/*
6981 	 * If two vbdev modules try to claim with conflicting claim types, the module that was added
6982 	 * last wins. The winner gets the claim and is the only one that has its examine_disk
6983 	 * callback invoked.
6984 	 */
6985 	ctx[0].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE;
6986 	ctx[0].expect_claim_err = -EPERM;
6987 	ctx[1].claim_type = SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE;
6988 	bdev = allocate_bdev("bdev0");
6989 	CU_ASSERT(ctx[0].examine_config_count == 1);
6990 	CU_ASSERT(ctx[0].examine_disk_count == 0);
6991 	CU_ASSERT(ctx[1].examine_config_count == 1);
6992 	CU_ASSERT(ctx[1].examine_disk_count == 1);
6993 	SPDK_CU_ASSERT_FATAL(ctx[1].desc != NULL);
6994 	CU_ASSERT(ctx[1].desc->claim->module == &mod[1]);
6995 	CU_ASSERT(bdev->internal.claim_type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
6996 	reset_examine_claimed_ctx();
6997 	free_bdev(bdev);
6998 
6999 	ut_testing_examine_claimed = false;
7000 }
7001 
7002 int
7003 main(int argc, char **argv)
7004 {
7005 	CU_pSuite		suite = NULL;
7006 	unsigned int		num_failures;
7007 
7008 	CU_set_error_action(CUEA_ABORT);
7009 	CU_initialize_registry();
7010 
7011 	suite = CU_add_suite("bdev", null_init, null_clean);
7012 
7013 	CU_ADD_TEST(suite, bytes_to_blocks_test);
7014 	CU_ADD_TEST(suite, num_blocks_test);
7015 	CU_ADD_TEST(suite, io_valid_test);
7016 	CU_ADD_TEST(suite, open_write_test);
7017 	CU_ADD_TEST(suite, claim_test);
7018 	CU_ADD_TEST(suite, alias_add_del_test);
7019 	CU_ADD_TEST(suite, get_device_stat_test);
7020 	CU_ADD_TEST(suite, bdev_io_types_test);
7021 	CU_ADD_TEST(suite, bdev_io_wait_test);
7022 	CU_ADD_TEST(suite, bdev_io_spans_split_test);
7023 	CU_ADD_TEST(suite, bdev_io_boundary_split_test);
7024 	CU_ADD_TEST(suite, bdev_io_max_size_and_segment_split_test);
7025 	CU_ADD_TEST(suite, bdev_io_mix_split_test);
7026 	CU_ADD_TEST(suite, bdev_io_split_with_io_wait);
7027 	CU_ADD_TEST(suite, bdev_io_write_unit_split_test);
7028 	CU_ADD_TEST(suite, bdev_io_alignment_with_boundary);
7029 	CU_ADD_TEST(suite, bdev_io_alignment);
7030 	CU_ADD_TEST(suite, bdev_histograms);
7031 	CU_ADD_TEST(suite, bdev_write_zeroes);
7032 	CU_ADD_TEST(suite, bdev_compare_and_write);
7033 	CU_ADD_TEST(suite, bdev_compare);
7034 	CU_ADD_TEST(suite, bdev_compare_emulated);
7035 	CU_ADD_TEST(suite, bdev_zcopy_write);
7036 	CU_ADD_TEST(suite, bdev_zcopy_read);
7037 	CU_ADD_TEST(suite, bdev_open_while_hotremove);
7038 	CU_ADD_TEST(suite, bdev_close_while_hotremove);
7039 	CU_ADD_TEST(suite, bdev_open_ext);
7040 	CU_ADD_TEST(suite, bdev_open_ext_unregister);
7041 	CU_ADD_TEST(suite, bdev_set_io_timeout);
7042 	CU_ADD_TEST(suite, bdev_set_qd_sampling);
7043 	CU_ADD_TEST(suite, lba_range_overlap);
7044 	CU_ADD_TEST(suite, lock_lba_range_check_ranges);
7045 	CU_ADD_TEST(suite, lock_lba_range_with_io_outstanding);
7046 	CU_ADD_TEST(suite, lock_lba_range_overlapped);
7047 	CU_ADD_TEST(suite, bdev_io_abort);
7048 	CU_ADD_TEST(suite, bdev_unmap);
7049 	CU_ADD_TEST(suite, bdev_write_zeroes_split_test);
7050 	CU_ADD_TEST(suite, bdev_set_options_test);
7051 	CU_ADD_TEST(suite, bdev_multi_allocation);
7052 	CU_ADD_TEST(suite, bdev_get_memory_domains);
7053 	CU_ADD_TEST(suite, bdev_io_ext);
7054 	CU_ADD_TEST(suite, bdev_io_ext_no_opts);
7055 	CU_ADD_TEST(suite, bdev_io_ext_invalid_opts);
7056 	CU_ADD_TEST(suite, bdev_io_ext_split);
7057 	CU_ADD_TEST(suite, bdev_io_ext_bounce_buffer);
7058 	CU_ADD_TEST(suite, bdev_register_uuid_alias);
7059 	CU_ADD_TEST(suite, bdev_unregister_by_name);
7060 	CU_ADD_TEST(suite, for_each_bdev_test);
7061 	CU_ADD_TEST(suite, bdev_seek_test);
7062 	CU_ADD_TEST(suite, bdev_copy);
7063 	CU_ADD_TEST(suite, bdev_copy_split_test);
7064 	CU_ADD_TEST(suite, examine_locks);
7065 	CU_ADD_TEST(suite, claim_v2_rwo);
7066 	CU_ADD_TEST(suite, claim_v2_rom);
7067 	CU_ADD_TEST(suite, claim_v2_rwm);
7068 	CU_ADD_TEST(suite, claim_v2_existing_writer);
7069 	CU_ADD_TEST(suite, claim_v2_existing_v1);
7070 	CU_ADD_TEST(suite, claim_v1_existing_v2);
7071 	CU_ADD_TEST(suite, examine_claimed);
7072 
7073 	allocate_cores(1);
7074 	allocate_threads(1);
7075 	set_thread(0);
7076 
7077 	CU_basic_set_mode(CU_BRM_VERBOSE);
7078 	CU_basic_run_tests();
7079 	num_failures = CU_get_number_of_failures();
7080 	CU_cleanup_registry();
7081 
7082 	free_threads();
7083 	free_cores();
7084 
7085 	return num_failures;
7086 }
7087