1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_delay.h" 37 #include "spdk/rpc.h" 38 #include "spdk/env.h" 39 #include "spdk/endian.h" 40 #include "spdk/string.h" 41 #include "spdk/thread.h" 42 #include "spdk/util.h" 43 44 #include "spdk/bdev_module.h" 45 #include "spdk/log.h" 46 47 48 static int vbdev_delay_init(void); 49 static int vbdev_delay_get_ctx_size(void); 50 static void vbdev_delay_examine(struct spdk_bdev *bdev); 51 static void vbdev_delay_finish(void); 52 static int vbdev_delay_config_json(struct spdk_json_write_ctx *w); 53 54 static struct spdk_bdev_module delay_if = { 55 .name = "delay", 56 .module_init = vbdev_delay_init, 57 .config_text = NULL, 58 .get_ctx_size = vbdev_delay_get_ctx_size, 59 .examine_config = vbdev_delay_examine, 60 .module_fini = vbdev_delay_finish, 61 .config_json = vbdev_delay_config_json 62 }; 63 64 SPDK_BDEV_MODULE_REGISTER(delay, &delay_if) 65 66 /* Associative list to be used in examine */ 67 struct bdev_association { 68 char *vbdev_name; 69 char *bdev_name; 70 uint64_t avg_read_latency; 71 uint64_t p99_read_latency; 72 uint64_t avg_write_latency; 73 uint64_t p99_write_latency; 74 TAILQ_ENTRY(bdev_association) link; 75 }; 76 static TAILQ_HEAD(, bdev_association) g_bdev_associations = TAILQ_HEAD_INITIALIZER( 77 g_bdev_associations); 78 79 /* List of virtual bdevs and associated info for each. */ 80 struct vbdev_delay { 81 struct spdk_bdev *base_bdev; /* the thing we're attaching to */ 82 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 83 struct spdk_bdev delay_bdev; /* the delay virtual bdev */ 84 uint64_t average_read_latency_ticks; /* the average read delay */ 85 uint64_t p99_read_latency_ticks; /* the p99 read delay */ 86 uint64_t average_write_latency_ticks; /* the average write delay */ 87 uint64_t p99_write_latency_ticks; /* the p99 write delay */ 88 TAILQ_ENTRY(vbdev_delay) link; 89 struct spdk_thread *thread; /* thread where base device is opened */ 90 }; 91 static TAILQ_HEAD(, vbdev_delay) g_delay_nodes = TAILQ_HEAD_INITIALIZER(g_delay_nodes); 92 93 struct delay_bdev_io { 94 int status; 95 96 uint64_t completion_tick; 97 98 enum delay_io_type type; 99 100 struct spdk_io_channel *ch; 101 102 struct spdk_bdev_io_wait_entry bdev_io_wait; 103 104 STAILQ_ENTRY(delay_bdev_io) link; 105 }; 106 107 struct delay_io_channel { 108 struct spdk_io_channel *base_ch; /* IO channel of base device */ 109 STAILQ_HEAD(, delay_bdev_io) avg_read_io; 110 STAILQ_HEAD(, delay_bdev_io) p99_read_io; 111 STAILQ_HEAD(, delay_bdev_io) avg_write_io; 112 STAILQ_HEAD(, delay_bdev_io) p99_write_io; 113 struct spdk_poller *io_poller; 114 unsigned int rand_seed; 115 }; 116 117 static void 118 vbdev_delay_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); 119 120 121 /* Callback for unregistering the IO device. */ 122 static void 123 _device_unregister_cb(void *io_device) 124 { 125 struct vbdev_delay *delay_node = io_device; 126 127 /* Done with this delay_node. */ 128 free(delay_node->delay_bdev.name); 129 free(delay_node); 130 } 131 132 static void 133 _vbdev_delay_destruct(void *ctx) 134 { 135 struct spdk_bdev_desc *desc = ctx; 136 137 spdk_bdev_close(desc); 138 } 139 140 static int 141 vbdev_delay_destruct(void *ctx) 142 { 143 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 144 145 /* It is important to follow this exact sequence of steps for destroying 146 * a vbdev... 147 */ 148 149 TAILQ_REMOVE(&g_delay_nodes, delay_node, link); 150 151 /* Unclaim the underlying bdev. */ 152 spdk_bdev_module_release_bdev(delay_node->base_bdev); 153 154 /* Close the underlying bdev on its same opened thread. */ 155 if (delay_node->thread && delay_node->thread != spdk_get_thread()) { 156 spdk_thread_send_msg(delay_node->thread, _vbdev_delay_destruct, delay_node->base_desc); 157 } else { 158 spdk_bdev_close(delay_node->base_desc); 159 } 160 161 /* Unregister the io_device. */ 162 spdk_io_device_unregister(delay_node, _device_unregister_cb); 163 164 return 0; 165 } 166 167 static int 168 _process_io_stailq(void *arg, uint64_t ticks) 169 { 170 STAILQ_HEAD(, delay_bdev_io) *head = arg; 171 struct delay_bdev_io *io_ctx, *tmp; 172 int completions = 0; 173 174 STAILQ_FOREACH_SAFE(io_ctx, head, link, tmp) { 175 if (io_ctx->completion_tick <= ticks) { 176 STAILQ_REMOVE(head, io_ctx, delay_bdev_io, link); 177 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(io_ctx), io_ctx->status); 178 completions++; 179 } else { 180 /* In the general case, I/O will become ready in an fifo order. When timeouts are dynamically 181 * changed, this is not necessarily the case. However, the normal behavior will be restored 182 * after the outstanding I/O at the time of the change have been completed. 183 * This essentially means that moving from a high to low latency creates a dam for the new I/O 184 * submitted after the latency change. This is considered desirable behavior for the use case where 185 * we are trying to trigger a pre-defined timeout on an initiator. 186 */ 187 break; 188 } 189 } 190 191 return completions; 192 } 193 194 static int 195 _delay_finish_io(void *arg) 196 { 197 struct delay_io_channel *delay_ch = arg; 198 uint64_t ticks = spdk_get_ticks(); 199 int completions = 0; 200 201 completions += _process_io_stailq(&delay_ch->avg_read_io, ticks); 202 completions += _process_io_stailq(&delay_ch->avg_write_io, ticks); 203 completions += _process_io_stailq(&delay_ch->p99_read_io, ticks); 204 completions += _process_io_stailq(&delay_ch->p99_write_io, ticks); 205 206 return completions == 0 ? SPDK_POLLER_IDLE : SPDK_POLLER_BUSY; 207 } 208 209 /* Completion callback for IO that were issued from this bdev. The original bdev_io 210 * is passed in as an arg so we'll complete that one with the appropriate status 211 * and then free the one that this module issued. 212 */ 213 static void 214 _delay_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 215 { 216 struct spdk_bdev_io *orig_io = cb_arg; 217 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(orig_io->bdev, struct vbdev_delay, delay_bdev); 218 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)orig_io->driver_ctx; 219 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(io_ctx->ch); 220 221 io_ctx->status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 222 spdk_bdev_free_io(bdev_io); 223 224 /* Put the I/O into the proper list for processing by the channel poller. */ 225 switch (io_ctx->type) { 226 case DELAY_AVG_READ: 227 io_ctx->completion_tick = spdk_get_ticks() + delay_node->average_read_latency_ticks; 228 STAILQ_INSERT_TAIL(&delay_ch->avg_read_io, io_ctx, link); 229 break; 230 case DELAY_AVG_WRITE: 231 io_ctx->completion_tick = spdk_get_ticks() + delay_node->average_write_latency_ticks; 232 STAILQ_INSERT_TAIL(&delay_ch->avg_write_io, io_ctx, link); 233 break; 234 case DELAY_P99_READ: 235 io_ctx->completion_tick = spdk_get_ticks() + delay_node->p99_read_latency_ticks; 236 STAILQ_INSERT_TAIL(&delay_ch->p99_read_io, io_ctx, link); 237 break; 238 case DELAY_P99_WRITE: 239 io_ctx->completion_tick = spdk_get_ticks() + delay_node->p99_write_latency_ticks; 240 STAILQ_INSERT_TAIL(&delay_ch->p99_write_io, io_ctx, link); 241 break; 242 case DELAY_NONE: 243 default: 244 spdk_bdev_io_complete(orig_io, io_ctx->status); 245 break; 246 } 247 } 248 249 static void 250 vbdev_delay_resubmit_io(void *arg) 251 { 252 struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg; 253 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 254 255 vbdev_delay_submit_request(io_ctx->ch, bdev_io); 256 } 257 258 static void 259 vbdev_delay_queue_io(struct spdk_bdev_io *bdev_io) 260 { 261 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 262 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(io_ctx->ch); 263 int rc; 264 265 io_ctx->bdev_io_wait.bdev = bdev_io->bdev; 266 io_ctx->bdev_io_wait.cb_fn = vbdev_delay_resubmit_io; 267 io_ctx->bdev_io_wait.cb_arg = bdev_io; 268 269 rc = spdk_bdev_queue_io_wait(bdev_io->bdev, delay_ch->base_ch, &io_ctx->bdev_io_wait); 270 if (rc != 0) { 271 SPDK_ERRLOG("Queue io failed in vbdev_delay_queue_io, rc=%d.\n", rc); 272 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 273 } 274 } 275 276 static void 277 delay_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 278 { 279 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_delay, 280 delay_bdev); 281 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 282 int rc; 283 284 if (!success) { 285 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 286 return; 287 } 288 289 rc = spdk_bdev_readv_blocks(delay_node->base_desc, delay_ch->base_ch, bdev_io->u.bdev.iovs, 290 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 291 bdev_io->u.bdev.num_blocks, _delay_complete_io, 292 bdev_io); 293 294 if (rc == -ENOMEM) { 295 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 296 vbdev_delay_queue_io(bdev_io); 297 } else if (rc != 0) { 298 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 299 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 300 } 301 } 302 303 static void 304 vbdev_delay_reset_dev(struct spdk_io_channel_iter *i, int status) 305 { 306 struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i); 307 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 308 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 309 struct vbdev_delay *delay_node = spdk_io_channel_iter_get_io_device(i); 310 int rc; 311 312 rc = spdk_bdev_reset(delay_node->base_desc, delay_ch->base_ch, 313 _delay_complete_io, bdev_io); 314 315 if (rc == -ENOMEM) { 316 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 317 vbdev_delay_queue_io(bdev_io); 318 } else if (rc != 0) { 319 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 320 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 321 } 322 } 323 324 static void 325 _abort_all_delayed_io(void *arg) 326 { 327 STAILQ_HEAD(, delay_bdev_io) *head = arg; 328 struct delay_bdev_io *io_ctx, *tmp; 329 330 STAILQ_FOREACH_SAFE(io_ctx, head, link, tmp) { 331 STAILQ_REMOVE(head, io_ctx, delay_bdev_io, link); 332 spdk_bdev_io_complete(spdk_bdev_io_from_ctx(io_ctx), SPDK_BDEV_IO_STATUS_ABORTED); 333 } 334 } 335 336 static void 337 vbdev_delay_reset_channel(struct spdk_io_channel_iter *i) 338 { 339 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 340 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 341 342 _abort_all_delayed_io(&delay_ch->avg_read_io); 343 _abort_all_delayed_io(&delay_ch->avg_write_io); 344 _abort_all_delayed_io(&delay_ch->p99_read_io); 345 _abort_all_delayed_io(&delay_ch->p99_write_io); 346 347 spdk_for_each_channel_continue(i, 0); 348 } 349 350 static bool 351 abort_delayed_io(void *_head, struct spdk_bdev_io *bio_to_abort) 352 { 353 STAILQ_HEAD(, delay_bdev_io) *head = _head; 354 struct delay_bdev_io *io_ctx_to_abort = (struct delay_bdev_io *)bio_to_abort->driver_ctx; 355 struct delay_bdev_io *io_ctx; 356 357 STAILQ_FOREACH(io_ctx, head, link) { 358 if (io_ctx == io_ctx_to_abort) { 359 STAILQ_REMOVE(head, io_ctx_to_abort, delay_bdev_io, link); 360 spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED); 361 return true; 362 } 363 } 364 365 return false; 366 } 367 368 static int 369 vbdev_delay_abort(struct vbdev_delay *delay_node, struct delay_io_channel *delay_ch, 370 struct spdk_bdev_io *bdev_io) 371 { 372 struct spdk_bdev_io *bio_to_abort = bdev_io->u.abort.bio_to_abort; 373 374 if (abort_delayed_io(&delay_ch->avg_read_io, bio_to_abort) || 375 abort_delayed_io(&delay_ch->avg_write_io, bio_to_abort) || 376 abort_delayed_io(&delay_ch->p99_read_io, bio_to_abort) || 377 abort_delayed_io(&delay_ch->p99_write_io, bio_to_abort)) { 378 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS); 379 return 0; 380 } 381 382 return spdk_bdev_abort(delay_node->base_desc, delay_ch->base_ch, bio_to_abort, 383 _delay_complete_io, bdev_io); 384 } 385 386 static void 387 vbdev_delay_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 388 { 389 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_delay, delay_bdev); 390 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 391 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 392 int rc = 0; 393 bool is_p99; 394 395 is_p99 = rand_r(&delay_ch->rand_seed) % 100 == 0 ? true : false; 396 397 io_ctx->ch = ch; 398 io_ctx->type = DELAY_NONE; 399 400 switch (bdev_io->type) { 401 case SPDK_BDEV_IO_TYPE_READ: 402 io_ctx->type = is_p99 ? DELAY_P99_READ : DELAY_AVG_READ; 403 spdk_bdev_io_get_buf(bdev_io, delay_read_get_buf_cb, 404 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 405 break; 406 case SPDK_BDEV_IO_TYPE_WRITE: 407 io_ctx->type = is_p99 ? DELAY_P99_WRITE : DELAY_AVG_WRITE; 408 rc = spdk_bdev_writev_blocks(delay_node->base_desc, delay_ch->base_ch, bdev_io->u.bdev.iovs, 409 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 410 bdev_io->u.bdev.num_blocks, _delay_complete_io, 411 bdev_io); 412 break; 413 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 414 rc = spdk_bdev_write_zeroes_blocks(delay_node->base_desc, delay_ch->base_ch, 415 bdev_io->u.bdev.offset_blocks, 416 bdev_io->u.bdev.num_blocks, 417 _delay_complete_io, bdev_io); 418 break; 419 case SPDK_BDEV_IO_TYPE_UNMAP: 420 rc = spdk_bdev_unmap_blocks(delay_node->base_desc, delay_ch->base_ch, 421 bdev_io->u.bdev.offset_blocks, 422 bdev_io->u.bdev.num_blocks, 423 _delay_complete_io, bdev_io); 424 break; 425 case SPDK_BDEV_IO_TYPE_FLUSH: 426 rc = spdk_bdev_flush_blocks(delay_node->base_desc, delay_ch->base_ch, 427 bdev_io->u.bdev.offset_blocks, 428 bdev_io->u.bdev.num_blocks, 429 _delay_complete_io, bdev_io); 430 break; 431 case SPDK_BDEV_IO_TYPE_RESET: 432 /* During reset, the generic bdev layer aborts all new I/Os and queues all new resets. 433 * Hence we can simply abort all I/Os delayed to complete. 434 */ 435 spdk_for_each_channel(delay_node, vbdev_delay_reset_channel, bdev_io, 436 vbdev_delay_reset_dev); 437 break; 438 case SPDK_BDEV_IO_TYPE_ABORT: 439 rc = vbdev_delay_abort(delay_node, delay_ch, bdev_io); 440 break; 441 default: 442 SPDK_ERRLOG("delay: unknown I/O type %d\n", bdev_io->type); 443 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 444 return; 445 } 446 447 if (rc == -ENOMEM) { 448 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 449 vbdev_delay_queue_io(bdev_io); 450 } else if (rc != 0) { 451 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 452 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 453 } 454 } 455 456 static bool 457 vbdev_delay_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 458 { 459 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 460 461 if (io_type == SPDK_BDEV_IO_TYPE_ZCOPY) { 462 return false; 463 } else { 464 return spdk_bdev_io_type_supported(delay_node->base_bdev, io_type); 465 } 466 } 467 468 static struct spdk_io_channel * 469 vbdev_delay_get_io_channel(void *ctx) 470 { 471 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 472 struct spdk_io_channel *delay_ch = NULL; 473 474 delay_ch = spdk_get_io_channel(delay_node); 475 476 return delay_ch; 477 } 478 479 static void 480 _delay_write_conf_values(struct vbdev_delay *delay_node, struct spdk_json_write_ctx *w) 481 { 482 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&delay_node->delay_bdev)); 483 spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(delay_node->base_bdev)); 484 spdk_json_write_named_int64(w, "avg_read_latency", 485 delay_node->average_read_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 486 spdk_json_write_named_int64(w, "p99_read_latency", 487 delay_node->p99_read_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 488 spdk_json_write_named_int64(w, "avg_write_latency", 489 delay_node->average_write_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 490 spdk_json_write_named_int64(w, "p99_write_latency", 491 delay_node->p99_write_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 492 } 493 494 static int 495 vbdev_delay_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 496 { 497 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 498 499 spdk_json_write_name(w, "delay"); 500 spdk_json_write_object_begin(w); 501 _delay_write_conf_values(delay_node, w); 502 spdk_json_write_object_end(w); 503 504 return 0; 505 } 506 507 /* This is used to generate JSON that can configure this module to its current state. */ 508 static int 509 vbdev_delay_config_json(struct spdk_json_write_ctx *w) 510 { 511 struct vbdev_delay *delay_node; 512 513 TAILQ_FOREACH(delay_node, &g_delay_nodes, link) { 514 spdk_json_write_object_begin(w); 515 spdk_json_write_named_string(w, "method", "bdev_delay_create"); 516 spdk_json_write_named_object_begin(w, "params"); 517 _delay_write_conf_values(delay_node, w); 518 spdk_json_write_object_end(w); 519 } 520 return 0; 521 } 522 523 /* We provide this callback for the SPDK channel code to create a channel using 524 * the channel struct we provided in our module get_io_channel() entry point. Here 525 * we get and save off an underlying base channel of the device below us so that 526 * we can communicate with the base bdev on a per channel basis. If we needed 527 * our own poller for this vbdev, we'd register it here. 528 */ 529 static int 530 delay_bdev_ch_create_cb(void *io_device, void *ctx_buf) 531 { 532 struct delay_io_channel *delay_ch = ctx_buf; 533 struct vbdev_delay *delay_node = io_device; 534 535 STAILQ_INIT(&delay_ch->avg_read_io); 536 STAILQ_INIT(&delay_ch->p99_read_io); 537 STAILQ_INIT(&delay_ch->avg_write_io); 538 STAILQ_INIT(&delay_ch->p99_write_io); 539 540 delay_ch->io_poller = SPDK_POLLER_REGISTER(_delay_finish_io, delay_ch, 0); 541 delay_ch->base_ch = spdk_bdev_get_io_channel(delay_node->base_desc); 542 delay_ch->rand_seed = time(NULL); 543 544 return 0; 545 } 546 547 /* We provide this callback for the SPDK channel code to destroy a channel 548 * created with our create callback. We just need to undo anything we did 549 * when we created. If this bdev used its own poller, we'd unregsiter it here. 550 */ 551 static void 552 delay_bdev_ch_destroy_cb(void *io_device, void *ctx_buf) 553 { 554 struct delay_io_channel *delay_ch = ctx_buf; 555 556 spdk_poller_unregister(&delay_ch->io_poller); 557 spdk_put_io_channel(delay_ch->base_ch); 558 } 559 560 /* Create the delay association from the bdev and vbdev name and insert 561 * on the global list. */ 562 static int 563 vbdev_delay_insert_association(const char *bdev_name, const char *vbdev_name, 564 uint64_t avg_read_latency, uint64_t p99_read_latency, 565 uint64_t avg_write_latency, uint64_t p99_write_latency) 566 { 567 struct bdev_association *assoc; 568 569 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 570 if (strcmp(vbdev_name, assoc->vbdev_name) == 0) { 571 SPDK_ERRLOG("delay bdev %s already exists\n", vbdev_name); 572 return -EEXIST; 573 } 574 } 575 576 assoc = calloc(1, sizeof(struct bdev_association)); 577 if (!assoc) { 578 SPDK_ERRLOG("could not allocate bdev_association\n"); 579 return -ENOMEM; 580 } 581 582 assoc->bdev_name = strdup(bdev_name); 583 if (!assoc->bdev_name) { 584 SPDK_ERRLOG("could not allocate assoc->bdev_name\n"); 585 free(assoc); 586 return -ENOMEM; 587 } 588 589 assoc->vbdev_name = strdup(vbdev_name); 590 if (!assoc->vbdev_name) { 591 SPDK_ERRLOG("could not allocate assoc->vbdev_name\n"); 592 free(assoc->bdev_name); 593 free(assoc); 594 return -ENOMEM; 595 } 596 597 assoc->avg_read_latency = avg_read_latency; 598 assoc->p99_read_latency = p99_read_latency; 599 assoc->avg_write_latency = avg_write_latency; 600 assoc->p99_write_latency = p99_write_latency; 601 602 TAILQ_INSERT_TAIL(&g_bdev_associations, assoc, link); 603 604 return 0; 605 } 606 607 int 608 vbdev_delay_update_latency_value(char *delay_name, uint64_t latency_us, enum delay_io_type type) 609 { 610 struct spdk_bdev *delay_bdev; 611 struct vbdev_delay *delay_node; 612 uint64_t ticks_mhz = spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 613 614 delay_bdev = spdk_bdev_get_by_name(delay_name); 615 if (delay_bdev == NULL) { 616 return -ENODEV; 617 } else if (delay_bdev->module != &delay_if) { 618 return -EINVAL; 619 } 620 621 delay_node = SPDK_CONTAINEROF(delay_bdev, struct vbdev_delay, delay_bdev); 622 623 switch (type) { 624 case DELAY_AVG_READ: 625 delay_node->average_read_latency_ticks = ticks_mhz * latency_us; 626 break; 627 case DELAY_AVG_WRITE: 628 delay_node->average_write_latency_ticks = ticks_mhz * latency_us; 629 break; 630 case DELAY_P99_READ: 631 delay_node->p99_read_latency_ticks = ticks_mhz * latency_us; 632 break; 633 case DELAY_P99_WRITE: 634 delay_node->p99_write_latency_ticks = ticks_mhz * latency_us; 635 break; 636 default: 637 return -EINVAL; 638 } 639 640 return 0; 641 } 642 643 static int 644 vbdev_delay_init(void) 645 { 646 /* Not allowing for .ini style configuration. */ 647 return 0; 648 } 649 650 static void 651 vbdev_delay_finish(void) 652 { 653 struct bdev_association *assoc; 654 655 while ((assoc = TAILQ_FIRST(&g_bdev_associations))) { 656 TAILQ_REMOVE(&g_bdev_associations, assoc, link); 657 free(assoc->bdev_name); 658 free(assoc->vbdev_name); 659 free(assoc); 660 } 661 } 662 663 static int 664 vbdev_delay_get_ctx_size(void) 665 { 666 return sizeof(struct delay_bdev_io); 667 } 668 669 static void 670 vbdev_delay_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 671 { 672 /* No config per bdev needed */ 673 } 674 675 /* When we register our bdev this is how we specify our entry points. */ 676 static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { 677 .destruct = vbdev_delay_destruct, 678 .submit_request = vbdev_delay_submit_request, 679 .io_type_supported = vbdev_delay_io_type_supported, 680 .get_io_channel = vbdev_delay_get_io_channel, 681 .dump_info_json = vbdev_delay_dump_info_json, 682 .write_config_json = vbdev_delay_write_config_json, 683 }; 684 685 static void 686 vbdev_delay_base_bdev_hotremove_cb(struct spdk_bdev *bdev_find) 687 { 688 struct vbdev_delay *delay_node, *tmp; 689 690 TAILQ_FOREACH_SAFE(delay_node, &g_delay_nodes, link, tmp) { 691 if (bdev_find == delay_node->base_bdev) { 692 spdk_bdev_unregister(&delay_node->delay_bdev, NULL, NULL); 693 } 694 } 695 } 696 697 /* Called when the underlying base bdev triggers asynchronous event such as bdev removal. */ 698 static void 699 vbdev_delay_base_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, 700 void *event_ctx) 701 { 702 switch (type) { 703 case SPDK_BDEV_EVENT_REMOVE: 704 vbdev_delay_base_bdev_hotremove_cb(bdev); 705 break; 706 default: 707 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type); 708 break; 709 } 710 } 711 712 /* Create and register the delay vbdev if we find it in our list of bdev names. 713 * This can be called either by the examine path or RPC method. 714 */ 715 static int 716 vbdev_delay_register(const char *bdev_name) 717 { 718 struct bdev_association *assoc; 719 struct vbdev_delay *delay_node; 720 struct spdk_bdev *bdev; 721 uint64_t ticks_mhz = spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 722 int rc = 0; 723 724 /* Check our list of names from config versus this bdev and if 725 * there's a match, create the delay_node & bdev accordingly. 726 */ 727 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 728 if (strcmp(assoc->bdev_name, bdev_name) != 0) { 729 continue; 730 } 731 732 delay_node = calloc(1, sizeof(struct vbdev_delay)); 733 if (!delay_node) { 734 rc = -ENOMEM; 735 SPDK_ERRLOG("could not allocate delay_node\n"); 736 break; 737 } 738 delay_node->delay_bdev.name = strdup(assoc->vbdev_name); 739 if (!delay_node->delay_bdev.name) { 740 rc = -ENOMEM; 741 SPDK_ERRLOG("could not allocate delay_bdev name\n"); 742 free(delay_node); 743 break; 744 } 745 delay_node->delay_bdev.product_name = "delay"; 746 747 /* The base bdev that we're attaching to. */ 748 rc = spdk_bdev_open_ext(bdev_name, true, vbdev_delay_base_bdev_event_cb, 749 NULL, &delay_node->base_desc); 750 if (rc) { 751 if (rc != -ENODEV) { 752 SPDK_ERRLOG("could not open bdev %s\n", bdev_name); 753 } 754 free(delay_node->delay_bdev.name); 755 free(delay_node); 756 break; 757 } 758 759 bdev = spdk_bdev_desc_get_bdev(delay_node->base_desc); 760 delay_node->base_bdev = bdev; 761 762 delay_node->delay_bdev.write_cache = bdev->write_cache; 763 delay_node->delay_bdev.required_alignment = bdev->required_alignment; 764 delay_node->delay_bdev.optimal_io_boundary = bdev->optimal_io_boundary; 765 delay_node->delay_bdev.blocklen = bdev->blocklen; 766 delay_node->delay_bdev.blockcnt = bdev->blockcnt; 767 768 delay_node->delay_bdev.ctxt = delay_node; 769 delay_node->delay_bdev.fn_table = &vbdev_delay_fn_table; 770 delay_node->delay_bdev.module = &delay_if; 771 772 /* Store the number of ticks you need to add to get the I/O expiration time. */ 773 delay_node->average_read_latency_ticks = ticks_mhz * assoc->avg_read_latency; 774 delay_node->p99_read_latency_ticks = ticks_mhz * assoc->p99_read_latency; 775 delay_node->average_write_latency_ticks = ticks_mhz * assoc->avg_write_latency; 776 delay_node->p99_write_latency_ticks = ticks_mhz * assoc->p99_write_latency; 777 778 spdk_io_device_register(delay_node, delay_bdev_ch_create_cb, delay_bdev_ch_destroy_cb, 779 sizeof(struct delay_io_channel), 780 assoc->vbdev_name); 781 782 /* Save the thread where the base device is opened */ 783 delay_node->thread = spdk_get_thread(); 784 785 rc = spdk_bdev_module_claim_bdev(bdev, delay_node->base_desc, delay_node->delay_bdev.module); 786 if (rc) { 787 SPDK_ERRLOG("could not claim bdev %s\n", bdev_name); 788 goto error_close; 789 } 790 791 rc = spdk_bdev_register(&delay_node->delay_bdev); 792 if (rc) { 793 SPDK_ERRLOG("could not register delay_bdev\n"); 794 spdk_bdev_module_release_bdev(delay_node->base_bdev); 795 goto error_close; 796 } 797 798 TAILQ_INSERT_TAIL(&g_delay_nodes, delay_node, link); 799 } 800 801 return rc; 802 803 error_close: 804 spdk_bdev_close(delay_node->base_desc); 805 spdk_io_device_unregister(delay_node, NULL); 806 free(delay_node->delay_bdev.name); 807 free(delay_node); 808 return rc; 809 } 810 811 int 812 create_delay_disk(const char *bdev_name, const char *vbdev_name, uint64_t avg_read_latency, 813 uint64_t p99_read_latency, uint64_t avg_write_latency, uint64_t p99_write_latency) 814 { 815 int rc = 0; 816 817 if (p99_read_latency < avg_read_latency || p99_write_latency < avg_write_latency) { 818 SPDK_ERRLOG("Unable to create a delay bdev where p99 latency is less than average latency.\n"); 819 return -EINVAL; 820 } 821 822 rc = vbdev_delay_insert_association(bdev_name, vbdev_name, avg_read_latency, p99_read_latency, 823 avg_write_latency, p99_write_latency); 824 if (rc) { 825 return rc; 826 } 827 828 rc = vbdev_delay_register(bdev_name); 829 if (rc == -ENODEV) { 830 /* This is not an error, we tracked the name above and it still 831 * may show up later. 832 */ 833 SPDK_NOTICELOG("vbdev creation deferred pending base bdev arrival\n"); 834 rc = 0; 835 } 836 837 return rc; 838 } 839 840 void 841 delete_delay_disk(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 842 { 843 struct bdev_association *assoc; 844 845 if (!bdev || bdev->module != &delay_if) { 846 cb_fn(cb_arg, -ENODEV); 847 return; 848 } 849 850 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 851 if (strcmp(assoc->vbdev_name, bdev->name) == 0) { 852 TAILQ_REMOVE(&g_bdev_associations, assoc, link); 853 free(assoc->bdev_name); 854 free(assoc->vbdev_name); 855 free(assoc); 856 break; 857 } 858 } 859 860 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 861 } 862 863 static void 864 vbdev_delay_examine(struct spdk_bdev *bdev) 865 { 866 vbdev_delay_register(bdev->name); 867 868 spdk_bdev_module_examine_done(&delay_if); 869 } 870 871 SPDK_LOG_REGISTER_COMPONENT(vbdev_delay) 872