1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_delay.h" 37 #include "spdk/rpc.h" 38 #include "spdk/env.h" 39 #include "spdk/conf.h" 40 #include "spdk/endian.h" 41 #include "spdk/string.h" 42 #include "spdk/thread.h" 43 #include "spdk/util.h" 44 45 #include "spdk/bdev_module.h" 46 #include "spdk_internal/log.h" 47 48 49 static int vbdev_delay_init(void); 50 static int vbdev_delay_get_ctx_size(void); 51 static void vbdev_delay_examine(struct spdk_bdev *bdev); 52 static void vbdev_delay_finish(void); 53 static int vbdev_delay_config_json(struct spdk_json_write_ctx *w); 54 55 static struct spdk_bdev_module delay_if = { 56 .name = "delay", 57 .module_init = vbdev_delay_init, 58 .config_text = NULL, 59 .get_ctx_size = vbdev_delay_get_ctx_size, 60 .examine_config = vbdev_delay_examine, 61 .module_fini = vbdev_delay_finish, 62 .config_json = vbdev_delay_config_json 63 }; 64 65 SPDK_BDEV_MODULE_REGISTER(delay, &delay_if) 66 67 /* Associative list to be used in examine */ 68 struct bdev_association { 69 char *vbdev_name; 70 char *bdev_name; 71 uint64_t avg_read_latency; 72 uint64_t p99_read_latency; 73 uint64_t avg_write_latency; 74 uint64_t p99_write_latency; 75 TAILQ_ENTRY(bdev_association) link; 76 }; 77 static TAILQ_HEAD(, bdev_association) g_bdev_associations = TAILQ_HEAD_INITIALIZER( 78 g_bdev_associations); 79 80 /* List of virtual bdevs and associated info for each. */ 81 struct vbdev_delay { 82 struct spdk_bdev *base_bdev; /* the thing we're attaching to */ 83 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 84 struct spdk_bdev delay_bdev; /* the delay virtual bdev */ 85 uint64_t average_read_latency_ticks; /* the average read delay */ 86 uint64_t p99_read_latency_ticks; /* the p99 read delay */ 87 uint64_t average_write_latency_ticks; /* the average write delay */ 88 uint64_t p99_write_latency_ticks; /* the p99 write delay */ 89 TAILQ_ENTRY(vbdev_delay) link; 90 struct spdk_thread *thread; /* thread where base device is opened */ 91 }; 92 static TAILQ_HEAD(, vbdev_delay) g_delay_nodes = TAILQ_HEAD_INITIALIZER(g_delay_nodes); 93 94 struct delay_bdev_io { 95 int status; 96 97 uint64_t completion_tick; 98 99 enum delay_io_type type; 100 101 struct spdk_io_channel *ch; 102 103 struct spdk_bdev_io_wait_entry bdev_io_wait; 104 105 STAILQ_ENTRY(delay_bdev_io) link; 106 }; 107 108 struct delay_io_channel { 109 struct spdk_io_channel *base_ch; /* IO channel of base device */ 110 STAILQ_HEAD(, delay_bdev_io) avg_read_io; 111 STAILQ_HEAD(, delay_bdev_io) p99_read_io; 112 STAILQ_HEAD(, delay_bdev_io) avg_write_io; 113 STAILQ_HEAD(, delay_bdev_io) p99_write_io; 114 struct spdk_poller *io_poller; 115 unsigned int rand_seed; 116 }; 117 118 static void 119 vbdev_delay_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); 120 121 122 /* Callback for unregistering the IO device. */ 123 static void 124 _device_unregister_cb(void *io_device) 125 { 126 struct vbdev_delay *delay_node = io_device; 127 128 /* Done with this delay_node. */ 129 free(delay_node->delay_bdev.name); 130 free(delay_node); 131 } 132 133 static void 134 _vbdev_delay_destruct(void *ctx) 135 { 136 struct spdk_bdev_desc *desc = ctx; 137 138 spdk_bdev_close(desc); 139 } 140 141 static int 142 vbdev_delay_destruct(void *ctx) 143 { 144 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 145 146 /* It is important to follow this exact sequence of steps for destroying 147 * a vbdev... 148 */ 149 150 TAILQ_REMOVE(&g_delay_nodes, delay_node, link); 151 152 /* Unclaim the underlying bdev. */ 153 spdk_bdev_module_release_bdev(delay_node->base_bdev); 154 155 /* Close the underlying bdev on its same opened thread. */ 156 if (delay_node->thread && delay_node->thread != spdk_get_thread()) { 157 spdk_thread_send_msg(delay_node->thread, _vbdev_delay_destruct, delay_node->base_desc); 158 } else { 159 spdk_bdev_close(delay_node->base_desc); 160 } 161 162 /* Unregister the io_device. */ 163 spdk_io_device_unregister(delay_node, _device_unregister_cb); 164 165 return 0; 166 } 167 168 static void 169 _process_io_stailq(void *arg, uint64_t ticks) 170 { 171 STAILQ_HEAD(, delay_bdev_io) *head = arg; 172 struct delay_bdev_io *io_ctx, *tmp; 173 174 STAILQ_FOREACH_SAFE(io_ctx, head, link, tmp) { 175 if (io_ctx->completion_tick <= ticks) { 176 STAILQ_REMOVE(head, io_ctx, delay_bdev_io, link); 177 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(io_ctx), io_ctx->status); 178 } else { 179 /* In the general case, I/O will become ready in an fifo order. When timeouts are dynamically 180 * changed, this is not necessarily the case. However, the normal behavior will be restored 181 * after the outstanding I/O at the time of the change have been completed. 182 * This essentially means that moving from a high to low latency creates a dam for the new I/O 183 * submitted after the latency change. This is considered desirable behavior for the use case where 184 * we are trying to trigger a pre-defined timeout on an initiator. 185 */ 186 break; 187 } 188 } 189 } 190 191 static int 192 _delay_finish_io(void *arg) 193 { 194 struct delay_io_channel *delay_ch = arg; 195 uint64_t ticks = spdk_get_ticks(); 196 197 _process_io_stailq(&delay_ch->avg_read_io, ticks); 198 _process_io_stailq(&delay_ch->avg_write_io, ticks); 199 _process_io_stailq(&delay_ch->p99_read_io, ticks); 200 _process_io_stailq(&delay_ch->p99_write_io, ticks); 201 202 return 0; 203 } 204 205 /* Completion callback for IO that were issued from this bdev. The original bdev_io 206 * is passed in as an arg so we'll complete that one with the appropriate status 207 * and then free the one that this module issued. 208 */ 209 static void 210 _delay_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 211 { 212 struct spdk_bdev_io *orig_io = cb_arg; 213 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(orig_io->bdev, struct vbdev_delay, delay_bdev); 214 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)orig_io->driver_ctx; 215 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(io_ctx->ch); 216 217 io_ctx->status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 218 spdk_bdev_free_io(bdev_io); 219 220 /* Put the I/O into the proper list for processing by the channel poller. */ 221 switch (io_ctx->type) { 222 case DELAY_AVG_READ: 223 io_ctx->completion_tick = spdk_get_ticks() + delay_node->average_read_latency_ticks; 224 STAILQ_INSERT_TAIL(&delay_ch->avg_read_io, io_ctx, link); 225 break; 226 case DELAY_AVG_WRITE: 227 io_ctx->completion_tick = spdk_get_ticks() + delay_node->average_write_latency_ticks; 228 STAILQ_INSERT_TAIL(&delay_ch->avg_write_io, io_ctx, link); 229 break; 230 case DELAY_P99_READ: 231 io_ctx->completion_tick = spdk_get_ticks() + delay_node->p99_read_latency_ticks; 232 STAILQ_INSERT_TAIL(&delay_ch->p99_read_io, io_ctx, link); 233 break; 234 case DELAY_P99_WRITE: 235 io_ctx->completion_tick = spdk_get_ticks() + delay_node->p99_write_latency_ticks; 236 STAILQ_INSERT_TAIL(&delay_ch->p99_write_io, io_ctx, link); 237 break; 238 case DELAY_NONE: 239 default: 240 spdk_bdev_io_complete(orig_io, io_ctx->status); 241 break; 242 } 243 } 244 245 static void 246 vbdev_delay_resubmit_io(void *arg) 247 { 248 struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg; 249 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 250 251 vbdev_delay_submit_request(io_ctx->ch, bdev_io); 252 } 253 254 static void 255 vbdev_delay_queue_io(struct spdk_bdev_io *bdev_io) 256 { 257 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 258 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(io_ctx->ch); 259 int rc; 260 261 io_ctx->bdev_io_wait.bdev = bdev_io->bdev; 262 io_ctx->bdev_io_wait.cb_fn = vbdev_delay_resubmit_io; 263 io_ctx->bdev_io_wait.cb_arg = bdev_io; 264 265 rc = spdk_bdev_queue_io_wait(bdev_io->bdev, delay_ch->base_ch, &io_ctx->bdev_io_wait); 266 if (rc != 0) { 267 SPDK_ERRLOG("Queue io failed in vbdev_delay_queue_io, rc=%d.\n", rc); 268 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 269 } 270 } 271 272 static void 273 delay_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 274 { 275 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_delay, 276 delay_bdev); 277 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 278 int rc; 279 280 if (!success) { 281 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 282 return; 283 } 284 285 rc = spdk_bdev_readv_blocks(delay_node->base_desc, delay_ch->base_ch, bdev_io->u.bdev.iovs, 286 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 287 bdev_io->u.bdev.num_blocks, _delay_complete_io, 288 bdev_io); 289 290 if (rc == -ENOMEM) { 291 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 292 vbdev_delay_queue_io(bdev_io); 293 } else if (rc != 0) { 294 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 295 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 296 } 297 } 298 299 static void 300 vbdev_delay_reset_dev(struct spdk_io_channel_iter *i, int status) 301 { 302 struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i); 303 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 304 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 305 struct vbdev_delay *delay_node = spdk_io_channel_iter_get_io_device(i); 306 int rc; 307 308 rc = spdk_bdev_reset(delay_node->base_desc, delay_ch->base_ch, 309 _delay_complete_io, bdev_io); 310 311 if (rc == -ENOMEM) { 312 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 313 vbdev_delay_queue_io(bdev_io); 314 } else if (rc != 0) { 315 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 316 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 317 } 318 } 319 320 static void 321 _abort_all_delayed_io(void *arg) 322 { 323 STAILQ_HEAD(, delay_bdev_io) *head = arg; 324 struct delay_bdev_io *io_ctx, *tmp; 325 326 STAILQ_FOREACH_SAFE(io_ctx, head, link, tmp) { 327 STAILQ_REMOVE(head, io_ctx, delay_bdev_io, link); 328 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(io_ctx), SPDK_BDEV_IO_STATUS_ABORTED); 329 } 330 } 331 332 static void 333 vbdev_delay_reset_channel(struct spdk_io_channel_iter *i) 334 { 335 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 336 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 337 338 _abort_all_delayed_io(&delay_ch->avg_read_io); 339 _abort_all_delayed_io(&delay_ch->avg_write_io); 340 _abort_all_delayed_io(&delay_ch->p99_read_io); 341 _abort_all_delayed_io(&delay_ch->p99_write_io); 342 343 spdk_for_each_channel_continue(i, 0); 344 } 345 346 static bool 347 abort_delayed_io(void *_head, struct spdk_bdev_io *bio_to_abort) 348 { 349 STAILQ_HEAD(, delay_bdev_io) *head = _head; 350 struct delay_bdev_io *io_ctx_to_abort = (struct delay_bdev_io *)bio_to_abort->driver_ctx; 351 struct delay_bdev_io *io_ctx; 352 353 STAILQ_FOREACH(io_ctx, head, link) { 354 if (io_ctx == io_ctx_to_abort) { 355 STAILQ_REMOVE(head, io_ctx_to_abort, delay_bdev_io, link); 356 spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED); 357 return true; 358 } 359 } 360 361 return false; 362 } 363 364 static int 365 vbdev_delay_abort(struct vbdev_delay *delay_node, struct delay_io_channel *delay_ch, 366 struct spdk_bdev_io *bdev_io) 367 { 368 struct spdk_bdev_io *bio_to_abort = bdev_io->u.abort.bio_to_abort; 369 370 if (abort_delayed_io(&delay_ch->avg_read_io, bio_to_abort) || 371 abort_delayed_io(&delay_ch->avg_write_io, bio_to_abort) || 372 abort_delayed_io(&delay_ch->p99_read_io, bio_to_abort) || 373 abort_delayed_io(&delay_ch->p99_write_io, bio_to_abort)) { 374 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 375 return 0; 376 } 377 378 return spdk_bdev_abort(delay_node->base_desc, delay_ch->base_ch, bio_to_abort, 379 _delay_complete_io, bdev_io); 380 } 381 382 static void 383 vbdev_delay_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 384 { 385 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_delay, delay_bdev); 386 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 387 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 388 int rc = 0; 389 bool is_p99; 390 391 is_p99 = rand_r(&delay_ch->rand_seed) % 100 == 0 ? true : false; 392 393 io_ctx->ch = ch; 394 io_ctx->type = DELAY_NONE; 395 396 switch (bdev_io->type) { 397 case SPDK_BDEV_IO_TYPE_READ: 398 io_ctx->type = is_p99 ? DELAY_P99_READ : DELAY_AVG_READ; 399 spdk_bdev_io_get_buf(bdev_io, delay_read_get_buf_cb, 400 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 401 break; 402 case SPDK_BDEV_IO_TYPE_WRITE: 403 io_ctx->type = is_p99 ? DELAY_P99_WRITE : DELAY_AVG_WRITE; 404 rc = spdk_bdev_writev_blocks(delay_node->base_desc, delay_ch->base_ch, bdev_io->u.bdev.iovs, 405 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 406 bdev_io->u.bdev.num_blocks, _delay_complete_io, 407 bdev_io); 408 break; 409 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 410 rc = spdk_bdev_write_zeroes_blocks(delay_node->base_desc, delay_ch->base_ch, 411 bdev_io->u.bdev.offset_blocks, 412 bdev_io->u.bdev.num_blocks, 413 _delay_complete_io, bdev_io); 414 break; 415 case SPDK_BDEV_IO_TYPE_UNMAP: 416 rc = spdk_bdev_unmap_blocks(delay_node->base_desc, delay_ch->base_ch, 417 bdev_io->u.bdev.offset_blocks, 418 bdev_io->u.bdev.num_blocks, 419 _delay_complete_io, bdev_io); 420 break; 421 case SPDK_BDEV_IO_TYPE_FLUSH: 422 rc = spdk_bdev_flush_blocks(delay_node->base_desc, delay_ch->base_ch, 423 bdev_io->u.bdev.offset_blocks, 424 bdev_io->u.bdev.num_blocks, 425 _delay_complete_io, bdev_io); 426 break; 427 case SPDK_BDEV_IO_TYPE_RESET: 428 /* During reset, the generic bdev layer aborts all new I/Os and queues all new resets. 429 * Hence we can simply abort all I/Os delayed to complete. 430 */ 431 spdk_for_each_channel(delay_node, vbdev_delay_reset_channel, bdev_io, 432 vbdev_delay_reset_dev); 433 break; 434 case SPDK_BDEV_IO_TYPE_ABORT: 435 rc = vbdev_delay_abort(delay_node, delay_ch, bdev_io); 436 break; 437 default: 438 SPDK_ERRLOG("delay: unknown I/O type %d\n", bdev_io->type); 439 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 440 return; 441 } 442 443 if (rc == -ENOMEM) { 444 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 445 vbdev_delay_queue_io(bdev_io); 446 } else if (rc != 0) { 447 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 448 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 449 } 450 } 451 452 static bool 453 vbdev_delay_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 454 { 455 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 456 457 if (io_type == SPDK_BDEV_IO_TYPE_ZCOPY) { 458 return false; 459 } else { 460 return spdk_bdev_io_type_supported(delay_node->base_bdev, io_type); 461 } 462 } 463 464 static struct spdk_io_channel * 465 vbdev_delay_get_io_channel(void *ctx) 466 { 467 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 468 struct spdk_io_channel *delay_ch = NULL; 469 470 delay_ch = spdk_get_io_channel(delay_node); 471 472 return delay_ch; 473 } 474 475 static void 476 _delay_write_conf_values(struct vbdev_delay *delay_node, struct spdk_json_write_ctx *w) 477 { 478 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&delay_node->delay_bdev)); 479 spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(delay_node->base_bdev)); 480 spdk_json_write_named_int64(w, "avg_read_latency", 481 delay_node->average_read_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 482 spdk_json_write_named_int64(w, "p99_read_latency", 483 delay_node->p99_read_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 484 spdk_json_write_named_int64(w, "avg_write_latency", 485 delay_node->average_write_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 486 spdk_json_write_named_int64(w, "p99_write_latency", 487 delay_node->p99_write_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 488 } 489 490 static int 491 vbdev_delay_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 492 { 493 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 494 495 spdk_json_write_name(w, "delay"); 496 spdk_json_write_object_begin(w); 497 _delay_write_conf_values(delay_node, w); 498 spdk_json_write_object_end(w); 499 500 return 0; 501 } 502 503 /* This is used to generate JSON that can configure this module to its current state. */ 504 static int 505 vbdev_delay_config_json(struct spdk_json_write_ctx *w) 506 { 507 struct vbdev_delay *delay_node; 508 509 TAILQ_FOREACH(delay_node, &g_delay_nodes, link) { 510 spdk_json_write_object_begin(w); 511 spdk_json_write_named_string(w, "method", "bdev_delay_create"); 512 spdk_json_write_named_object_begin(w, "params"); 513 _delay_write_conf_values(delay_node, w); 514 spdk_json_write_object_end(w); 515 } 516 return 0; 517 } 518 519 /* We provide this callback for the SPDK channel code to create a channel using 520 * the channel struct we provided in our module get_io_channel() entry point. Here 521 * we get and save off an underlying base channel of the device below us so that 522 * we can communicate with the base bdev on a per channel basis. If we needed 523 * our own poller for this vbdev, we'd register it here. 524 */ 525 static int 526 delay_bdev_ch_create_cb(void *io_device, void *ctx_buf) 527 { 528 struct delay_io_channel *delay_ch = ctx_buf; 529 struct vbdev_delay *delay_node = io_device; 530 531 STAILQ_INIT(&delay_ch->avg_read_io); 532 STAILQ_INIT(&delay_ch->p99_read_io); 533 STAILQ_INIT(&delay_ch->avg_write_io); 534 STAILQ_INIT(&delay_ch->p99_write_io); 535 536 delay_ch->io_poller = SPDK_POLLER_REGISTER(_delay_finish_io, delay_ch, 0); 537 delay_ch->base_ch = spdk_bdev_get_io_channel(delay_node->base_desc); 538 delay_ch->rand_seed = time(NULL); 539 540 return 0; 541 } 542 543 /* We provide this callback for the SPDK channel code to destroy a channel 544 * created with our create callback. We just need to undo anything we did 545 * when we created. If this bdev used its own poller, we'd unregsiter it here. 546 */ 547 static void 548 delay_bdev_ch_destroy_cb(void *io_device, void *ctx_buf) 549 { 550 struct delay_io_channel *delay_ch = ctx_buf; 551 552 spdk_poller_unregister(&delay_ch->io_poller); 553 spdk_put_io_channel(delay_ch->base_ch); 554 } 555 556 /* Create the delay association from the bdev and vbdev name and insert 557 * on the global list. */ 558 static int 559 vbdev_delay_insert_association(const char *bdev_name, const char *vbdev_name, 560 uint64_t avg_read_latency, uint64_t p99_read_latency, 561 uint64_t avg_write_latency, uint64_t p99_write_latency) 562 { 563 struct bdev_association *assoc; 564 565 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 566 if (strcmp(vbdev_name, assoc->vbdev_name) == 0) { 567 SPDK_ERRLOG("delay bdev %s already exists\n", vbdev_name); 568 return -EEXIST; 569 } 570 } 571 572 assoc = calloc(1, sizeof(struct bdev_association)); 573 if (!assoc) { 574 SPDK_ERRLOG("could not allocate bdev_association\n"); 575 return -ENOMEM; 576 } 577 578 assoc->bdev_name = strdup(bdev_name); 579 if (!assoc->bdev_name) { 580 SPDK_ERRLOG("could not allocate assoc->bdev_name\n"); 581 free(assoc); 582 return -ENOMEM; 583 } 584 585 assoc->vbdev_name = strdup(vbdev_name); 586 if (!assoc->vbdev_name) { 587 SPDK_ERRLOG("could not allocate assoc->vbdev_name\n"); 588 free(assoc->bdev_name); 589 free(assoc); 590 return -ENOMEM; 591 } 592 593 assoc->avg_read_latency = avg_read_latency; 594 assoc->p99_read_latency = p99_read_latency; 595 assoc->avg_write_latency = avg_write_latency; 596 assoc->p99_write_latency = p99_write_latency; 597 598 TAILQ_INSERT_TAIL(&g_bdev_associations, assoc, link); 599 600 return 0; 601 } 602 603 int 604 vbdev_delay_update_latency_value(char *delay_name, uint64_t latency_us, enum delay_io_type type) 605 { 606 struct spdk_bdev *delay_bdev; 607 struct vbdev_delay *delay_node; 608 uint64_t ticks_mhz = spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 609 610 delay_bdev = spdk_bdev_get_by_name(delay_name); 611 if (delay_bdev == NULL) { 612 return -ENODEV; 613 } else if (delay_bdev->module != &delay_if) { 614 return -EINVAL; 615 } 616 617 delay_node = SPDK_CONTAINEROF(delay_bdev, struct vbdev_delay, delay_bdev); 618 619 switch (type) { 620 case DELAY_AVG_READ: 621 delay_node->average_read_latency_ticks = ticks_mhz * latency_us; 622 break; 623 case DELAY_AVG_WRITE: 624 delay_node->average_write_latency_ticks = ticks_mhz * latency_us; 625 break; 626 case DELAY_P99_READ: 627 delay_node->p99_read_latency_ticks = ticks_mhz * latency_us; 628 break; 629 case DELAY_P99_WRITE: 630 delay_node->p99_write_latency_ticks = ticks_mhz * latency_us; 631 break; 632 default: 633 return -EINVAL; 634 } 635 636 return 0; 637 } 638 639 static int 640 vbdev_delay_init(void) 641 { 642 /* Not allowing for .ini style configuration. */ 643 return 0; 644 } 645 646 static void 647 vbdev_delay_finish(void) 648 { 649 struct bdev_association *assoc; 650 651 while ((assoc = TAILQ_FIRST(&g_bdev_associations))) { 652 TAILQ_REMOVE(&g_bdev_associations, assoc, link); 653 free(assoc->bdev_name); 654 free(assoc->vbdev_name); 655 free(assoc); 656 } 657 } 658 659 static int 660 vbdev_delay_get_ctx_size(void) 661 { 662 return sizeof(struct delay_bdev_io); 663 } 664 665 static void 666 vbdev_delay_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 667 { 668 /* No config per bdev needed */ 669 } 670 671 /* When we register our bdev this is how we specify our entry points. */ 672 static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { 673 .destruct = vbdev_delay_destruct, 674 .submit_request = vbdev_delay_submit_request, 675 .io_type_supported = vbdev_delay_io_type_supported, 676 .get_io_channel = vbdev_delay_get_io_channel, 677 .dump_info_json = vbdev_delay_dump_info_json, 678 .write_config_json = vbdev_delay_write_config_json, 679 }; 680 681 /* Called when the underlying base bdev goes away. */ 682 static void 683 vbdev_delay_base_bdev_hotremove_cb(void *ctx) 684 { 685 struct vbdev_delay *delay_node, *tmp; 686 struct spdk_bdev *bdev_find = ctx; 687 688 TAILQ_FOREACH_SAFE(delay_node, &g_delay_nodes, link, tmp) { 689 if (bdev_find == delay_node->base_bdev) { 690 spdk_bdev_unregister(&delay_node->delay_bdev, NULL, NULL); 691 } 692 } 693 } 694 695 /* Create and register the delay vbdev if we find it in our list of bdev names. 696 * This can be called either by the examine path or RPC method. 697 */ 698 static int 699 vbdev_delay_register(struct spdk_bdev *bdev) 700 { 701 struct bdev_association *assoc; 702 struct vbdev_delay *delay_node; 703 uint64_t ticks_mhz = spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 704 int rc = 0; 705 706 /* Check our list of names from config versus this bdev and if 707 * there's a match, create the delay_node & bdev accordingly. 708 */ 709 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 710 if (strcmp(assoc->bdev_name, bdev->name) != 0) { 711 continue; 712 } 713 714 delay_node = calloc(1, sizeof(struct vbdev_delay)); 715 if (!delay_node) { 716 rc = -ENOMEM; 717 SPDK_ERRLOG("could not allocate delay_node\n"); 718 break; 719 } 720 721 /* The base bdev that we're attaching to. */ 722 delay_node->base_bdev = bdev; 723 delay_node->delay_bdev.name = strdup(assoc->vbdev_name); 724 if (!delay_node->delay_bdev.name) { 725 rc = -ENOMEM; 726 SPDK_ERRLOG("could not allocate delay_bdev name\n"); 727 free(delay_node); 728 break; 729 } 730 delay_node->delay_bdev.product_name = "delay"; 731 732 delay_node->delay_bdev.write_cache = bdev->write_cache; 733 delay_node->delay_bdev.required_alignment = bdev->required_alignment; 734 delay_node->delay_bdev.optimal_io_boundary = bdev->optimal_io_boundary; 735 delay_node->delay_bdev.blocklen = bdev->blocklen; 736 delay_node->delay_bdev.blockcnt = bdev->blockcnt; 737 738 delay_node->delay_bdev.ctxt = delay_node; 739 delay_node->delay_bdev.fn_table = &vbdev_delay_fn_table; 740 delay_node->delay_bdev.module = &delay_if; 741 742 /* Store the number of ticks you need to add to get the I/O expiration time. */ 743 delay_node->average_read_latency_ticks = ticks_mhz * assoc->avg_read_latency; 744 delay_node->p99_read_latency_ticks = ticks_mhz * assoc->p99_read_latency; 745 delay_node->average_write_latency_ticks = ticks_mhz * assoc->avg_write_latency; 746 delay_node->p99_write_latency_ticks = ticks_mhz * assoc->p99_write_latency; 747 748 spdk_io_device_register(delay_node, delay_bdev_ch_create_cb, delay_bdev_ch_destroy_cb, 749 sizeof(struct delay_io_channel), 750 assoc->vbdev_name); 751 752 rc = spdk_bdev_open(bdev, true, vbdev_delay_base_bdev_hotremove_cb, 753 bdev, &delay_node->base_desc); 754 if (rc) { 755 SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev)); 756 goto error_unregister; 757 } 758 759 /* Save the thread where the base device is opened */ 760 delay_node->thread = spdk_get_thread(); 761 762 rc = spdk_bdev_module_claim_bdev(bdev, delay_node->base_desc, delay_node->delay_bdev.module); 763 if (rc) { 764 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev)); 765 goto error_close; 766 } 767 768 rc = spdk_bdev_register(&delay_node->delay_bdev); 769 if (rc) { 770 SPDK_ERRLOG("could not register delay_bdev\n"); 771 spdk_bdev_module_release_bdev(delay_node->base_bdev); 772 goto error_close; 773 } 774 775 TAILQ_INSERT_TAIL(&g_delay_nodes, delay_node, link); 776 } 777 778 return rc; 779 780 error_close: 781 spdk_bdev_close(delay_node->base_desc); 782 error_unregister: 783 spdk_io_device_unregister(delay_node, NULL); 784 free(delay_node->delay_bdev.name); 785 free(delay_node); 786 return rc; 787 } 788 789 int 790 create_delay_disk(const char *bdev_name, const char *vbdev_name, uint64_t avg_read_latency, 791 uint64_t p99_read_latency, uint64_t avg_write_latency, uint64_t p99_write_latency) 792 { 793 struct spdk_bdev *bdev = NULL; 794 int rc = 0; 795 796 if (p99_read_latency < avg_read_latency || p99_write_latency < avg_write_latency) { 797 SPDK_ERRLOG("Unable to create a delay bdev where p99 latency is less than average latency.\n"); 798 return -EINVAL; 799 } 800 801 rc = vbdev_delay_insert_association(bdev_name, vbdev_name, avg_read_latency, p99_read_latency, 802 avg_write_latency, p99_write_latency); 803 if (rc) { 804 return rc; 805 } 806 807 bdev = spdk_bdev_get_by_name(bdev_name); 808 if (!bdev) { 809 return 0; 810 } 811 812 return vbdev_delay_register(bdev); 813 } 814 815 void 816 delete_delay_disk(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 817 { 818 struct bdev_association *assoc; 819 820 if (!bdev || bdev->module != &delay_if) { 821 cb_fn(cb_arg, -ENODEV); 822 return; 823 } 824 825 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 826 if (strcmp(assoc->vbdev_name, bdev->name) == 0) { 827 TAILQ_REMOVE(&g_bdev_associations, assoc, link); 828 free(assoc->bdev_name); 829 free(assoc->vbdev_name); 830 free(assoc); 831 break; 832 } 833 } 834 835 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 836 } 837 838 static void 839 vbdev_delay_examine(struct spdk_bdev *bdev) 840 { 841 vbdev_delay_register(bdev); 842 843 spdk_bdev_module_examine_done(&delay_if); 844 } 845 846 SPDK_LOG_REGISTER_COMPONENT("vbdev_delay", SPDK_LOG_VBDEV_DELAY) 847