1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/stdinc.h" 35 36 #include "vbdev_delay.h" 37 #include "spdk/rpc.h" 38 #include "spdk/env.h" 39 #include "spdk/conf.h" 40 #include "spdk/endian.h" 41 #include "spdk/string.h" 42 #include "spdk/thread.h" 43 #include "spdk/util.h" 44 45 #include "spdk/bdev_module.h" 46 #include "spdk_internal/log.h" 47 48 49 static int vbdev_delay_init(void); 50 static int vbdev_delay_get_ctx_size(void); 51 static void vbdev_delay_examine(struct spdk_bdev *bdev); 52 static void vbdev_delay_finish(void); 53 static int vbdev_delay_config_json(struct spdk_json_write_ctx *w); 54 55 static struct spdk_bdev_module delay_if = { 56 .name = "delay", 57 .module_init = vbdev_delay_init, 58 .config_text = NULL, 59 .get_ctx_size = vbdev_delay_get_ctx_size, 60 .examine_config = vbdev_delay_examine, 61 .module_fini = vbdev_delay_finish, 62 .config_json = vbdev_delay_config_json 63 }; 64 65 SPDK_BDEV_MODULE_REGISTER(delay, &delay_if) 66 67 /* Associative list to be used in examine */ 68 struct bdev_association { 69 char *vbdev_name; 70 char *bdev_name; 71 uint64_t avg_read_latency; 72 uint64_t p99_read_latency; 73 uint64_t avg_write_latency; 74 uint64_t p99_write_latency; 75 TAILQ_ENTRY(bdev_association) link; 76 }; 77 static TAILQ_HEAD(, bdev_association) g_bdev_associations = TAILQ_HEAD_INITIALIZER( 78 g_bdev_associations); 79 80 /* List of virtual bdevs and associated info for each. */ 81 struct vbdev_delay { 82 struct spdk_bdev *base_bdev; /* the thing we're attaching to */ 83 struct spdk_bdev_desc *base_desc; /* its descriptor we get from open */ 84 struct spdk_bdev delay_bdev; /* the delay virtual bdev */ 85 uint64_t average_read_latency_ticks; /* the average read delay */ 86 uint64_t p99_read_latency_ticks; /* the p99 read delay */ 87 uint64_t average_write_latency_ticks; /* the average write delay */ 88 uint64_t p99_write_latency_ticks; /* the p99 write delay */ 89 TAILQ_ENTRY(vbdev_delay) link; 90 }; 91 static TAILQ_HEAD(, vbdev_delay) g_delay_nodes = TAILQ_HEAD_INITIALIZER(g_delay_nodes); 92 93 struct delay_bdev_io { 94 int status; 95 96 uint64_t completion_tick; 97 98 enum delay_io_type type; 99 100 struct spdk_io_channel *ch; 101 102 struct spdk_bdev_io_wait_entry bdev_io_wait; 103 104 STAILQ_ENTRY(delay_bdev_io) link; 105 }; 106 107 struct delay_io_channel { 108 struct spdk_io_channel *base_ch; /* IO channel of base device */ 109 STAILQ_HEAD(, delay_bdev_io) avg_read_io; 110 STAILQ_HEAD(, delay_bdev_io) p99_read_io; 111 STAILQ_HEAD(, delay_bdev_io) avg_write_io; 112 STAILQ_HEAD(, delay_bdev_io) p99_write_io; 113 struct spdk_poller *io_poller; 114 unsigned int rand_seed; 115 }; 116 117 static void 118 vbdev_delay_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io); 119 120 121 /* Callback for unregistering the IO device. */ 122 static void 123 _device_unregister_cb(void *io_device) 124 { 125 struct vbdev_delay *delay_node = io_device; 126 127 /* Done with this delay_node. */ 128 free(delay_node->delay_bdev.name); 129 free(delay_node); 130 } 131 132 static int 133 vbdev_delay_destruct(void *ctx) 134 { 135 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 136 137 /* It is important to follow this exact sequence of steps for destroying 138 * a vbdev... 139 */ 140 141 TAILQ_REMOVE(&g_delay_nodes, delay_node, link); 142 143 /* Unclaim the underlying bdev. */ 144 spdk_bdev_module_release_bdev(delay_node->base_bdev); 145 146 /* Close the underlying bdev. */ 147 spdk_bdev_close(delay_node->base_desc); 148 149 /* Unregister the io_device. */ 150 spdk_io_device_unregister(delay_node, _device_unregister_cb); 151 152 return 0; 153 } 154 155 static void 156 _process_io_stailq(void *arg, uint64_t ticks) 157 { 158 STAILQ_HEAD(, delay_bdev_io) *head = arg; 159 struct delay_bdev_io *io_ctx, *tmp; 160 161 STAILQ_FOREACH_SAFE(io_ctx, head, link, tmp) { 162 if (io_ctx->completion_tick <= ticks) { 163 STAILQ_REMOVE(head, io_ctx, delay_bdev_io, link); 164 spdk_bdev_io_complete(SPDK_CONTAINEROF(io_ctx, struct spdk_bdev_io, driver_ctx), io_ctx->status); 165 } else { 166 /* In the general case, I/O will become ready in an fifo order. When timeouts are dynamically 167 * changed, this is not necessarily the case. However, the normal behavior will be restored 168 * after the outstanding I/O at the time of the change have been completed. 169 * This essentially means that moving from a high to low latency creates a dam for the new I/O 170 * submitted after the latency change. This is considered desirable behavior for the use case where 171 * we are trying to trigger a pre-defined timeout on an initiator. 172 */ 173 break; 174 } 175 } 176 } 177 178 static int 179 _delay_finish_io(void *arg) 180 { 181 struct delay_io_channel *delay_ch = arg; 182 uint64_t ticks = spdk_get_ticks(); 183 184 _process_io_stailq(&delay_ch->avg_read_io, ticks); 185 _process_io_stailq(&delay_ch->avg_write_io, ticks); 186 _process_io_stailq(&delay_ch->p99_read_io, ticks); 187 _process_io_stailq(&delay_ch->p99_write_io, ticks); 188 189 return 0; 190 } 191 192 /* Completion callback for IO that were issued from this bdev. The original bdev_io 193 * is passed in as an arg so we'll complete that one with the appropriate status 194 * and then free the one that this module issued. 195 */ 196 static void 197 _delay_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 198 { 199 struct spdk_bdev_io *orig_io = cb_arg; 200 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(orig_io->bdev, struct vbdev_delay, delay_bdev); 201 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)orig_io->driver_ctx; 202 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(io_ctx->ch); 203 204 io_ctx->status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 205 spdk_bdev_free_io(bdev_io); 206 207 /* Put the I/O into the proper list for processing by the channel poller. */ 208 switch (io_ctx->type) { 209 case DELAY_AVG_READ: 210 io_ctx->completion_tick = spdk_get_ticks() + delay_node->average_read_latency_ticks; 211 STAILQ_INSERT_TAIL(&delay_ch->avg_read_io, io_ctx, link); 212 break; 213 case DELAY_AVG_WRITE: 214 io_ctx->completion_tick = spdk_get_ticks() + delay_node->average_write_latency_ticks; 215 STAILQ_INSERT_TAIL(&delay_ch->avg_write_io, io_ctx, link); 216 break; 217 case DELAY_P99_READ: 218 io_ctx->completion_tick = spdk_get_ticks() + delay_node->p99_read_latency_ticks; 219 STAILQ_INSERT_TAIL(&delay_ch->p99_read_io, io_ctx, link); 220 break; 221 case DELAY_P99_WRITE: 222 io_ctx->completion_tick = spdk_get_ticks() + delay_node->p99_write_latency_ticks; 223 STAILQ_INSERT_TAIL(&delay_ch->p99_write_io, io_ctx, link); 224 break; 225 case DELAY_NONE: 226 default: 227 spdk_bdev_io_complete(orig_io, io_ctx->status); 228 break; 229 } 230 } 231 232 static void 233 vbdev_delay_resubmit_io(void *arg) 234 { 235 struct spdk_bdev_io *bdev_io = (struct spdk_bdev_io *)arg; 236 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 237 238 vbdev_delay_submit_request(io_ctx->ch, bdev_io); 239 } 240 241 static void 242 vbdev_delay_queue_io(struct spdk_bdev_io *bdev_io) 243 { 244 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 245 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(io_ctx->ch); 246 int rc; 247 248 io_ctx->bdev_io_wait.bdev = bdev_io->bdev; 249 io_ctx->bdev_io_wait.cb_fn = vbdev_delay_resubmit_io; 250 io_ctx->bdev_io_wait.cb_arg = bdev_io; 251 252 rc = spdk_bdev_queue_io_wait(bdev_io->bdev, delay_ch->base_ch, &io_ctx->bdev_io_wait); 253 if (rc != 0) { 254 SPDK_ERRLOG("Queue io failed in vbdev_delay_queue_io, rc=%d.\n", rc); 255 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 256 } 257 } 258 259 static void 260 delay_read_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success) 261 { 262 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_delay, 263 delay_bdev); 264 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 265 int rc; 266 267 if (!success) { 268 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 269 return; 270 } 271 272 rc = spdk_bdev_readv_blocks(delay_node->base_desc, delay_ch->base_ch, bdev_io->u.bdev.iovs, 273 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 274 bdev_io->u.bdev.num_blocks, _delay_complete_io, 275 bdev_io); 276 277 if (rc != 0) { 278 if (rc == -ENOMEM) { 279 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 280 vbdev_delay_queue_io(bdev_io); 281 } else { 282 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 283 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 284 } 285 } 286 } 287 288 static void 289 vbdev_delay_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io) 290 { 291 struct vbdev_delay *delay_node = SPDK_CONTAINEROF(bdev_io->bdev, struct vbdev_delay, delay_bdev); 292 struct delay_io_channel *delay_ch = spdk_io_channel_get_ctx(ch); 293 struct delay_bdev_io *io_ctx = (struct delay_bdev_io *)bdev_io->driver_ctx; 294 int rc = 0; 295 bool is_p99; 296 297 is_p99 = rand_r(&delay_ch->rand_seed) % 100 == 0 ? true : false; 298 299 io_ctx->ch = ch; 300 io_ctx->type = DELAY_NONE; 301 302 switch (bdev_io->type) { 303 case SPDK_BDEV_IO_TYPE_READ: 304 io_ctx->type = is_p99 ? DELAY_P99_READ : DELAY_AVG_READ; 305 spdk_bdev_io_get_buf(bdev_io, delay_read_get_buf_cb, 306 bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen); 307 break; 308 case SPDK_BDEV_IO_TYPE_WRITE: 309 io_ctx->type = is_p99 ? DELAY_P99_WRITE : DELAY_AVG_WRITE; 310 rc = spdk_bdev_writev_blocks(delay_node->base_desc, delay_ch->base_ch, bdev_io->u.bdev.iovs, 311 bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, 312 bdev_io->u.bdev.num_blocks, _delay_complete_io, 313 bdev_io); 314 break; 315 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 316 rc = spdk_bdev_write_zeroes_blocks(delay_node->base_desc, delay_ch->base_ch, 317 bdev_io->u.bdev.offset_blocks, 318 bdev_io->u.bdev.num_blocks, 319 _delay_complete_io, bdev_io); 320 break; 321 case SPDK_BDEV_IO_TYPE_UNMAP: 322 rc = spdk_bdev_unmap_blocks(delay_node->base_desc, delay_ch->base_ch, 323 bdev_io->u.bdev.offset_blocks, 324 bdev_io->u.bdev.num_blocks, 325 _delay_complete_io, bdev_io); 326 break; 327 case SPDK_BDEV_IO_TYPE_FLUSH: 328 rc = spdk_bdev_flush_blocks(delay_node->base_desc, delay_ch->base_ch, 329 bdev_io->u.bdev.offset_blocks, 330 bdev_io->u.bdev.num_blocks, 331 _delay_complete_io, bdev_io); 332 break; 333 case SPDK_BDEV_IO_TYPE_RESET: 334 rc = spdk_bdev_reset(delay_node->base_desc, delay_ch->base_ch, 335 _delay_complete_io, bdev_io); 336 break; 337 default: 338 SPDK_ERRLOG("delay: unknown I/O type %d\n", bdev_io->type); 339 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 340 return; 341 } 342 if (rc != 0) { 343 if (rc == -ENOMEM) { 344 SPDK_ERRLOG("No memory, start to queue io for delay.\n"); 345 vbdev_delay_queue_io(bdev_io); 346 } else { 347 SPDK_ERRLOG("ERROR on bdev_io submission!\n"); 348 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 349 } 350 } 351 } 352 353 static bool 354 vbdev_delay_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type) 355 { 356 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 357 358 return spdk_bdev_io_type_supported(delay_node->base_bdev, io_type); 359 } 360 361 static struct spdk_io_channel * 362 vbdev_delay_get_io_channel(void *ctx) 363 { 364 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 365 struct spdk_io_channel *delay_ch = NULL; 366 367 delay_ch = spdk_get_io_channel(delay_node); 368 369 return delay_ch; 370 } 371 372 static void 373 _delay_write_conf_values(struct vbdev_delay *delay_node, struct spdk_json_write_ctx *w) 374 { 375 spdk_json_write_named_string(w, "name", spdk_bdev_get_name(&delay_node->delay_bdev)); 376 spdk_json_write_named_string(w, "base_bdev_name", spdk_bdev_get_name(delay_node->base_bdev)); 377 spdk_json_write_named_int64(w, "avg_read_latency", 378 delay_node->average_read_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 379 spdk_json_write_named_int64(w, "p99_read_latency", 380 delay_node->p99_read_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 381 spdk_json_write_named_int64(w, "avg_write_latency", 382 delay_node->average_write_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 383 spdk_json_write_named_int64(w, "p99_write_latency", 384 delay_node->p99_write_latency_ticks * SPDK_SEC_TO_USEC / spdk_get_ticks_hz()); 385 } 386 387 static int 388 vbdev_delay_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) 389 { 390 struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; 391 392 spdk_json_write_name(w, "delay"); 393 spdk_json_write_object_begin(w); 394 _delay_write_conf_values(delay_node, w); 395 spdk_json_write_object_end(w); 396 397 return 0; 398 } 399 400 /* This is used to generate JSON that can configure this module to its current state. */ 401 static int 402 vbdev_delay_config_json(struct spdk_json_write_ctx *w) 403 { 404 struct vbdev_delay *delay_node; 405 406 TAILQ_FOREACH(delay_node, &g_delay_nodes, link) { 407 spdk_json_write_object_begin(w); 408 spdk_json_write_named_string(w, "method", "bdev_delay_create"); 409 spdk_json_write_named_object_begin(w, "params"); 410 _delay_write_conf_values(delay_node, w); 411 spdk_json_write_object_end(w); 412 } 413 return 0; 414 } 415 416 /* We provide this callback for the SPDK channel code to create a channel using 417 * the channel struct we provided in our module get_io_channel() entry point. Here 418 * we get and save off an underlying base channel of the device below us so that 419 * we can communicate with the base bdev on a per channel basis. If we needed 420 * our own poller for this vbdev, we'd register it here. 421 */ 422 static int 423 delay_bdev_ch_create_cb(void *io_device, void *ctx_buf) 424 { 425 struct delay_io_channel *delay_ch = ctx_buf; 426 struct vbdev_delay *delay_node = io_device; 427 428 STAILQ_INIT(&delay_ch->avg_read_io); 429 STAILQ_INIT(&delay_ch->p99_read_io); 430 STAILQ_INIT(&delay_ch->avg_write_io); 431 STAILQ_INIT(&delay_ch->p99_write_io); 432 433 delay_ch->io_poller = spdk_poller_register(_delay_finish_io, delay_ch, 0); 434 delay_ch->base_ch = spdk_bdev_get_io_channel(delay_node->base_desc); 435 delay_ch->rand_seed = time(NULL); 436 437 return 0; 438 } 439 440 /* We provide this callback for the SPDK channel code to destroy a channel 441 * created with our create callback. We just need to undo anything we did 442 * when we created. If this bdev used its own poller, we'd unregsiter it here. 443 */ 444 static void 445 delay_bdev_ch_destroy_cb(void *io_device, void *ctx_buf) 446 { 447 struct delay_io_channel *delay_ch = ctx_buf; 448 449 spdk_poller_unregister(&delay_ch->io_poller); 450 spdk_put_io_channel(delay_ch->base_ch); 451 } 452 453 /* Create the delay association from the bdev and vbdev name and insert 454 * on the global list. */ 455 static int 456 vbdev_delay_insert_association(const char *bdev_name, const char *vbdev_name, 457 uint64_t avg_read_latency, uint64_t p99_read_latency, 458 uint64_t avg_write_latency, uint64_t p99_write_latency) 459 { 460 struct bdev_association *assoc; 461 462 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 463 if (strcmp(vbdev_name, assoc->vbdev_name) == 0) { 464 SPDK_ERRLOG("delay bdev %s already exists\n", vbdev_name); 465 return -EEXIST; 466 } 467 } 468 469 assoc = calloc(1, sizeof(struct bdev_association)); 470 if (!assoc) { 471 SPDK_ERRLOG("could not allocate bdev_association\n"); 472 return -ENOMEM; 473 } 474 475 assoc->bdev_name = strdup(bdev_name); 476 if (!assoc->bdev_name) { 477 SPDK_ERRLOG("could not allocate assoc->bdev_name\n"); 478 free(assoc); 479 return -ENOMEM; 480 } 481 482 assoc->vbdev_name = strdup(vbdev_name); 483 if (!assoc->vbdev_name) { 484 SPDK_ERRLOG("could not allocate assoc->vbdev_name\n"); 485 free(assoc->bdev_name); 486 free(assoc); 487 return -ENOMEM; 488 } 489 490 assoc->avg_read_latency = avg_read_latency; 491 assoc->p99_read_latency = p99_read_latency; 492 assoc->avg_write_latency = avg_write_latency; 493 assoc->p99_write_latency = p99_write_latency; 494 495 TAILQ_INSERT_TAIL(&g_bdev_associations, assoc, link); 496 497 return 0; 498 } 499 500 int 501 vbdev_delay_update_latency_value(char *delay_name, uint64_t latency_us, enum delay_io_type type) 502 { 503 struct spdk_bdev *delay_bdev; 504 struct vbdev_delay *delay_node; 505 uint64_t ticks_mhz = spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 506 507 delay_bdev = spdk_bdev_get_by_name(delay_name); 508 if (delay_bdev == NULL) { 509 return -ENODEV; 510 } else if (delay_bdev->module != &delay_if) { 511 return -EINVAL; 512 } 513 514 delay_node = SPDK_CONTAINEROF(delay_bdev, struct vbdev_delay, delay_bdev); 515 516 switch (type) { 517 case DELAY_AVG_READ: 518 delay_node->average_read_latency_ticks = ticks_mhz * latency_us; 519 break; 520 case DELAY_AVG_WRITE: 521 delay_node->average_write_latency_ticks = ticks_mhz * latency_us; 522 break; 523 case DELAY_P99_READ: 524 delay_node->p99_read_latency_ticks = ticks_mhz * latency_us; 525 break; 526 case DELAY_P99_WRITE: 527 delay_node->p99_write_latency_ticks = ticks_mhz * latency_us; 528 break; 529 default: 530 return -EINVAL; 531 } 532 533 return 0; 534 } 535 536 static int 537 vbdev_delay_init(void) 538 { 539 /* Not allowing for .ini style configuration. */ 540 return 0; 541 } 542 543 static void 544 vbdev_delay_finish(void) 545 { 546 struct bdev_association *assoc; 547 548 while ((assoc = TAILQ_FIRST(&g_bdev_associations))) { 549 TAILQ_REMOVE(&g_bdev_associations, assoc, link); 550 free(assoc->bdev_name); 551 free(assoc->vbdev_name); 552 free(assoc); 553 } 554 } 555 556 static int 557 vbdev_delay_get_ctx_size(void) 558 { 559 return sizeof(struct delay_bdev_io); 560 } 561 562 static void 563 vbdev_delay_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 564 { 565 /* No config per bdev needed */ 566 } 567 568 /* When we register our bdev this is how we specify our entry points. */ 569 static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { 570 .destruct = vbdev_delay_destruct, 571 .submit_request = vbdev_delay_submit_request, 572 .io_type_supported = vbdev_delay_io_type_supported, 573 .get_io_channel = vbdev_delay_get_io_channel, 574 .dump_info_json = vbdev_delay_dump_info_json, 575 .write_config_json = vbdev_delay_write_config_json, 576 }; 577 578 /* Called when the underlying base bdev goes away. */ 579 static void 580 vbdev_delay_base_bdev_hotremove_cb(void *ctx) 581 { 582 struct vbdev_delay *delay_node, *tmp; 583 struct spdk_bdev *bdev_find = ctx; 584 585 TAILQ_FOREACH_SAFE(delay_node, &g_delay_nodes, link, tmp) { 586 if (bdev_find == delay_node->base_bdev) { 587 spdk_bdev_unregister(&delay_node->delay_bdev, NULL, NULL); 588 } 589 } 590 } 591 592 /* Create and register the delay vbdev if we find it in our list of bdev names. 593 * This can be called either by the examine path or RPC method. 594 */ 595 static int 596 vbdev_delay_register(struct spdk_bdev *bdev) 597 { 598 struct bdev_association *assoc; 599 struct vbdev_delay *delay_node; 600 uint64_t ticks_mhz = spdk_get_ticks_hz() / SPDK_SEC_TO_USEC; 601 int rc = 0; 602 603 /* Check our list of names from config versus this bdev and if 604 * there's a match, create the delay_node & bdev accordingly. 605 */ 606 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 607 if (strcmp(assoc->bdev_name, bdev->name) != 0) { 608 continue; 609 } 610 611 delay_node = calloc(1, sizeof(struct vbdev_delay)); 612 if (!delay_node) { 613 rc = -ENOMEM; 614 SPDK_ERRLOG("could not allocate delay_node\n"); 615 break; 616 } 617 618 /* The base bdev that we're attaching to. */ 619 delay_node->base_bdev = bdev; 620 delay_node->delay_bdev.name = strdup(assoc->vbdev_name); 621 if (!delay_node->delay_bdev.name) { 622 rc = -ENOMEM; 623 SPDK_ERRLOG("could not allocate delay_bdev name\n"); 624 free(delay_node); 625 break; 626 } 627 delay_node->delay_bdev.product_name = "delay"; 628 629 delay_node->delay_bdev.write_cache = bdev->write_cache; 630 delay_node->delay_bdev.required_alignment = bdev->required_alignment; 631 delay_node->delay_bdev.optimal_io_boundary = bdev->optimal_io_boundary; 632 delay_node->delay_bdev.blocklen = bdev->blocklen; 633 delay_node->delay_bdev.blockcnt = bdev->blockcnt; 634 635 delay_node->delay_bdev.ctxt = delay_node; 636 delay_node->delay_bdev.fn_table = &vbdev_delay_fn_table; 637 delay_node->delay_bdev.module = &delay_if; 638 639 /* Store the number of ticks you need to add to get the I/O expiration time. */ 640 delay_node->average_read_latency_ticks = ticks_mhz * assoc->avg_read_latency; 641 delay_node->p99_read_latency_ticks = ticks_mhz * assoc->p99_read_latency; 642 delay_node->average_write_latency_ticks = ticks_mhz * assoc->avg_write_latency; 643 delay_node->p99_write_latency_ticks = ticks_mhz * assoc->p99_write_latency; 644 645 spdk_io_device_register(delay_node, delay_bdev_ch_create_cb, delay_bdev_ch_destroy_cb, 646 sizeof(struct delay_io_channel), 647 assoc->vbdev_name); 648 649 rc = spdk_bdev_open(bdev, true, vbdev_delay_base_bdev_hotremove_cb, 650 bdev, &delay_node->base_desc); 651 if (rc) { 652 SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev)); 653 goto error_unregister; 654 } 655 656 rc = spdk_bdev_module_claim_bdev(bdev, delay_node->base_desc, delay_node->delay_bdev.module); 657 if (rc) { 658 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(bdev)); 659 goto error_close; 660 } 661 662 rc = spdk_bdev_register(&delay_node->delay_bdev); 663 if (rc) { 664 SPDK_ERRLOG("could not register delay_bdev\n"); 665 spdk_bdev_module_release_bdev(delay_node->base_bdev); 666 goto error_close; 667 } 668 669 TAILQ_INSERT_TAIL(&g_delay_nodes, delay_node, link); 670 } 671 672 return rc; 673 674 error_close: 675 spdk_bdev_close(delay_node->base_desc); 676 error_unregister: 677 spdk_io_device_unregister(delay_node, NULL); 678 free(delay_node->delay_bdev.name); 679 free(delay_node); 680 return rc; 681 } 682 683 int 684 create_delay_disk(const char *bdev_name, const char *vbdev_name, uint64_t avg_read_latency, 685 uint64_t p99_read_latency, uint64_t avg_write_latency, uint64_t p99_write_latency) 686 { 687 struct spdk_bdev *bdev = NULL; 688 int rc = 0; 689 690 if (p99_read_latency < avg_read_latency || p99_write_latency < avg_write_latency) { 691 SPDK_ERRLOG("Unable to create a delay bdev where p99 latency is less than average latency.\n"); 692 return -EINVAL; 693 } 694 695 rc = vbdev_delay_insert_association(bdev_name, vbdev_name, avg_read_latency, p99_read_latency, 696 avg_write_latency, p99_write_latency); 697 if (rc) { 698 return rc; 699 } 700 701 bdev = spdk_bdev_get_by_name(bdev_name); 702 if (!bdev) { 703 return 0; 704 } 705 706 return vbdev_delay_register(bdev); 707 } 708 709 void 710 delete_delay_disk(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 711 { 712 struct bdev_association *assoc; 713 714 if (!bdev || bdev->module != &delay_if) { 715 cb_fn(cb_arg, -ENODEV); 716 return; 717 } 718 719 TAILQ_FOREACH(assoc, &g_bdev_associations, link) { 720 if (strcmp(assoc->vbdev_name, bdev->name) == 0) { 721 TAILQ_REMOVE(&g_bdev_associations, assoc, link); 722 free(assoc->bdev_name); 723 free(assoc->vbdev_name); 724 free(assoc); 725 break; 726 } 727 } 728 729 spdk_bdev_unregister(bdev, cb_fn, cb_arg); 730 } 731 732 static void 733 vbdev_delay_examine(struct spdk_bdev *bdev) 734 { 735 vbdev_delay_register(bdev); 736 737 spdk_bdev_module_examine_done(&delay_if); 738 } 739 740 SPDK_LOG_REGISTER_COMPONENT("vbdev_delay", SPDK_LOG_VBDEV_DELAY) 741